1
0
Fork 0
mirror of https://gitlab.com/bramw/baserow.git synced 2025-04-07 14:25:37 +00:00

Merge branch 'airtable-import-session-authentication' into 'develop'

Allow authentication session when importing Airtable base

See merge request 
This commit is contained in:
Bram Wiepjes 2025-03-24 10:25:14 +00:00
commit 945414d811
17 changed files with 355 additions and 49 deletions
backend
changelog/entries/unreleased/feature
web-frontend/modules/database

View file

@ -40,6 +40,8 @@ class ImportDatabaseFromAirtableActionType(ActionType):
workspace_id: int
workspace_name: str
skip_files: bool
session: Optional[str]
session_signature: Optional[str]
@classmethod
def do(
@ -48,6 +50,8 @@ class ImportDatabaseFromAirtableActionType(ActionType):
workspace: Workspace,
airtable_share_id: str,
skip_files: bool,
session: Optional[str] = None,
session_signature: Optional[str] = None,
progress_builder: Optional[ChildProgressBuilder] = None,
**kwargs,
) -> Database:
@ -59,7 +63,9 @@ class ImportDatabaseFromAirtableActionType(ActionType):
information.
"""
config = AirtableImportConfig(skip_files=skip_files)
config = AirtableImportConfig(
skip_files=skip_files, session=session, session_signature=session_signature
)
database = AirtableHandler.import_from_airtable_to_workspace(
workspace,
@ -76,6 +82,8 @@ class ImportDatabaseFromAirtableActionType(ActionType):
workspace.id,
workspace.name,
skip_files,
session,
session_signature,
)
cls.register_action(user, params, cls.scope(workspace.id), workspace)

View file

@ -1,4 +1,5 @@
import dataclasses
from typing import Optional
@dataclasses.dataclass
@ -8,3 +9,22 @@ class AirtableImportConfig:
Indicates whether the files should not be downloaded and included in the
config. This can significantly improve the improvements.
"""
session: Optional[str] = None
"""
A session cookie can optionally be provided if the publicly shared base can only be
accessed authenticated.
"""
session_signature: Optional[str] = None
"""
If a session is provided, then the matching signature must be provided as well.
"""
def get_session_cookies(self):
cookies = {}
if self.session:
cookies["__Host-airtable-session"] = self.session
if self.session_signature:
cookies["__Host-airtable-session.sig"] = self.session_signature
return cookies

View file

@ -2,6 +2,10 @@ class AirtableBaseNotPublic(Exception):
"""Raised when the Airtable base is not publicly shared."""
class AirtableBaseRequiresAuthentication(Exception):
"""Raised when the Airtable base is not publicly shared."""
class AirtableShareIsNotABase(Exception):
"""Raised when shared Airtable link is not a base."""

View file

@ -43,6 +43,7 @@ from baserow.core.utils import (
from .config import AirtableImportConfig
from .exceptions import (
AirtableBaseNotPublic,
AirtableBaseRequiresAuthentication,
AirtableImportNotRespectingConfig,
AirtableShareIsNotABase,
AirtableSkipCellValue,
@ -76,21 +77,35 @@ BASE_HEADERS = {
class AirtableHandler:
@staticmethod
def fetch_publicly_shared_base(share_id: str) -> Tuple[str, dict, dict]:
def fetch_publicly_shared_base(
share_id: str, config: AirtableImportConfig
) -> Tuple[str, dict, dict]:
"""
Fetches the initial page of the publicly shared page. It will parse the content
and extract and return the initial data needed for future requests.
:param share_id: The Airtable share id of the page that must be fetched. Note
that the base must be shared publicly. The id stars with `shr`.
:param config: Additional configuration related to the import.
:raises AirtableShareIsNotABase: When the URL doesn't point to a shared base.
:return: The request ID, initial data and the cookies of the response.
"""
url = f"{AIRTABLE_BASE_URL}/{share_id}"
response = requests.get(url, headers=BASE_HEADERS) # nosec B113
response = requests.get(
url,
headers=BASE_HEADERS,
cookies=config.get_session_cookies(),
allow_redirects=False,
) # nosec B113
if not response.ok:
if response.status_code == 302 and response.headers.get(
"Location", ""
).startswith("/login"):
raise AirtableBaseRequiresAuthentication(
f"The base with share id {share_id} requires authentication."
)
elif not response.ok:
raise AirtableBaseNotPublic(
f"The base with share id {share_id} is not public."
)
@ -104,6 +119,7 @@ class AirtableHandler:
raw_init_data = re.search("window.initData = (.*);\n", decoded_content).group(1)
init_data = json.loads(raw_init_data)
cookies = response.cookies.get_dict()
cookies.update(**config.get_session_cookies())
if "sharedApplicationId" not in raw_init_data:
raise AirtableShareIsNotABase("The `shared_id` is not a base.")
@ -825,12 +841,12 @@ class AirtableHandler:
def fetch_and_combine_airtable_data(
cls,
share_id: str,
config: AirtableImportConfig,
progress_builder: Optional[ChildProgressBuilder] = None,
) -> Union[dict, dict, list]:
"""
@TODO docs
:param share_id: The shared Airtable ID of which the data must be fetched.
:param config: Additional configuration related to the import.
:param progress_builder: If provided will be used to build a child progress bar
and report on this methods progress to the parent of the progress_builder.
:return: The fetched init_data, schema, and list of tables enrichted with all
@ -841,7 +857,9 @@ class AirtableHandler:
# Execute the initial request to obtain the initial data that's needed to
# make the request.
request_id, init_data, cookies = cls.fetch_publicly_shared_base(share_id)
request_id, init_data, cookies = cls.fetch_publicly_shared_base(
share_id, config
)
progress.increment(state=AIRTABLE_EXPORT_JOB_DOWNLOADING_BASE)
# Loop over all the tables and make a request for each table to obtain the raw
@ -946,6 +964,7 @@ class AirtableHandler:
init_data, schema, tables = AirtableHandler.fetch_and_combine_airtable_data(
share_id,
config,
progress.create_child_builder(represents_progress=100),
)

View file

@ -7,6 +7,7 @@ from baserow.api.applications.serializers import (
from baserow.api.errors import ERROR_GROUP_DOES_NOT_EXIST, ERROR_USER_NOT_IN_GROUP
from baserow.contrib.database.airtable.exceptions import (
AirtableBaseNotPublic,
AirtableBaseRequiresAuthentication,
AirtableShareIsNotABase,
)
from baserow.contrib.database.airtable.models import AirtableImportJob
@ -41,6 +42,7 @@ class AirtableImportJobType(JobType):
AirtableBaseNotPublic: "The Airtable base is not publicly shared.",
AirtableShareIsNotABase: "The shared link is not a base. It's probably a "
"view and the Airtable import tool only supports shared bases.",
AirtableBaseRequiresAuthentication: "The Airtable base requires authentication.",
}
request_serializer_field_names = [
@ -48,6 +50,8 @@ class AirtableImportJobType(JobType):
"database_id",
"airtable_share_url",
"skip_files",
"session",
"session_signature",
]
request_serializer_field_overrides = {
@ -64,6 +68,16 @@ class AirtableImportJobType(JobType):
default=False,
help_text="If true, then the files are not downloaded and imported.",
),
"session": serializers.CharField(
default=None,
allow_null=True,
help_text="Optionally provide a session object that's used as authentication.",
),
"session_signature": serializers.CharField(
default=None,
allow_null=True,
help_text="The matching session signature if a session is provided.",
),
}
serializer_field_names = [
@ -99,10 +113,21 @@ class AirtableImportJobType(JobType):
airtable_share_id = extract_share_id_from_url(values["airtable_share_url"])
session = values.get("session", None)
signature = values.get("session_signature", None)
if bool(session) != bool(signature):
raise serializers.ValidationError(
f"Both 'session' and 'session_signature' must either be provided "
f"together or omitted together."
)
return {
"airtable_share_id": airtable_share_id,
"workspace": workspace,
"skip_files": values.get("skip_files", False),
"session": session,
"session_signature": signature,
}
def run(self, job, progress):
@ -113,6 +138,8 @@ class AirtableImportJobType(JobType):
job.workspace,
job.airtable_share_id,
job.skip_files,
job.session,
job.session_signature,
progress_builder=progress.create_child_builder(
represents_progress=progress.total
),

View file

@ -27,3 +27,10 @@ class AirtableImportJob(JobWithUserIpAddress, Job):
db_default=False,
help_text="If true, then the files are not downloaded and imported.",
)
session = models.CharField(
null=True,
help_text="Optionally provide a session object that's used as authentication.",
)
session_signature = models.CharField(
null=True, help_text="The matching session signature if a session is provided."
)

View file

@ -41,12 +41,30 @@ class Command(BaseCommand):
action="store_true",
help="When provided, the files will not be downloaded and imported.",
)
parser.add_argument(
"--airtable-session",
type=str,
default="",
help="",
)
parser.add_argument("--airtable-signature", type=str, help="", default="")
@transaction.atomic
def handle(self, *args, **options):
workspace_id = options["workspace_id"]
public_base_url = options["public_base_url"]
skip_files = options["skip_files"]
airtable_session = options["airtable_session"]
airtable_signature = options["airtable_signature"]
if bool(airtable_session) != bool(airtable_signature):
self.stderr.write(
self.style.ERROR(
"Both --airtable-session and --airtable-signature must either be "
"provided together or omitted together."
)
)
sys.exit(1)
try:
workspace = Workspace.objects.get(pk=workspace_id)
@ -73,7 +91,11 @@ class Command(BaseCommand):
try:
with NamedTemporaryFile() as download_files_buffer:
config = AirtableImportConfig(skip_files=skip_files)
config = AirtableImportConfig(
skip_files=skip_files,
session=airtable_session,
session_signature=airtable_signature,
)
AirtableHandler.import_from_airtable_to_workspace(
workspace,
share_id,

View file

@ -0,0 +1,28 @@
# Generated by Django 5.0.9 on 2025-03-20 20:16
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("database", "0183_viewgroupby_type_viewsort_type"),
]
operations = [
migrations.AddField(
model_name="airtableimportjob",
name="session",
field=models.CharField(
help_text="Optionally provide a session object that's used as authentication.",
null=True,
),
),
migrations.AddField(
model_name="airtableimportjob",
name="session_signature",
field=models.CharField(
help_text="The matching session signature if a session is provided.",
null=True,
),
),
]

View file

@ -9,9 +9,13 @@ from django.core.files.storage import FileSystemStorage
import pytest
import responses
from rest_framework import serializers
from baserow.contrib.database.airtable.config import AirtableImportConfig
from baserow.contrib.database.airtable.exceptions import AirtableShareIsNotABase
from baserow.contrib.database.airtable.exceptions import (
AirtableBaseRequiresAuthentication,
AirtableShareIsNotABase,
)
from baserow.contrib.database.airtable.handler import AirtableHandler
from baserow.contrib.database.airtable.job_types import AirtableImportJobType
from baserow.contrib.database.airtable.models import AirtableImportJob
@ -42,7 +46,8 @@ def test_fetch_publicly_shared_base():
)
request_id, init_data, cookies = AirtableHandler.fetch_publicly_shared_base(
"appZkaH3aWX3ZjT3b"
"appZkaH3aWX3ZjT3b",
AirtableImportConfig(),
)
assert request_id == "req8wbZoh7Be65osz"
assert init_data["pageLoadId"] == "pglUrFAGTNpbxUymM"
@ -62,7 +67,27 @@ def test_fetch_publicly_shared_base_not_base_request_id_missing():
)
with pytest.raises(AirtableShareIsNotABase):
AirtableHandler.fetch_publicly_shared_base(share_id)
AirtableHandler.fetch_publicly_shared_base(
share_id,
AirtableImportConfig(),
)
@pytest.mark.django_db
@responses.activate
def test_fetch_publicly_shared_base_with_authentication():
responses.add(
responses.GET,
"https://airtable.com/appZkaH3aWX3ZjT3b",
status=302,
body="Sign in",
headers={"Location": "/login?test"},
)
with pytest.raises(AirtableBaseRequiresAuthentication):
AirtableHandler.fetch_publicly_shared_base(
"appZkaH3aWX3ZjT3b",
AirtableImportConfig(),
)
@pytest.mark.django_db
@ -84,7 +109,8 @@ def test_fetch_table():
headers={"Set-Cookie": "brw=test;"},
)
request_id, init_data, cookies = AirtableHandler.fetch_publicly_shared_base(
"appZkaH3aWX3ZjT3b"
"appZkaH3aWX3ZjT3b",
AirtableImportConfig(),
)
cookies = {
@ -236,7 +262,7 @@ def test_to_baserow_database_export():
)
init_data, schema, tables = AirtableHandler.fetch_and_combine_airtable_data(
"appZkaH3aWX3ZjT3b"
"appZkaH3aWX3ZjT3b", AirtableImportConfig()
)
baserow_database_export, files_buffer = AirtableHandler.to_baserow_database_export(
init_data, schema, tables, AirtableImportConfig()
@ -479,7 +505,7 @@ def test_config_skip_files(tmpdir, data_fixture):
)
init_data, schema, tables = AirtableHandler.fetch_and_combine_airtable_data(
"appZkaH3aWX3ZjT3b"
"appZkaH3aWX3ZjT3b", AirtableImportConfig()
)
baserow_database_export, files_buffer = AirtableHandler.to_baserow_database_export(
init_data, schema, tables, AirtableImportConfig(skip_files=True)
@ -570,7 +596,7 @@ def test_to_baserow_database_export_without_primary_value():
)
init_data, schema, tables = AirtableHandler.fetch_and_combine_airtable_data(
"appZkaH3aWX3ZjT3b"
"appZkaH3aWX3ZjT3b", AirtableImportConfig()
)
# Rename the primary column so that we depend on the fallback in the migrations.
@ -1053,6 +1079,33 @@ def test_create_and_start_airtable_import_job_while_other_job_is_running(data_fi
)
@pytest.mark.django_db
@responses.activate
def test_create_and_start_airtable_import_job_without_both_session_and_signature(
data_fixture,
):
user = data_fixture.create_user()
workspace = data_fixture.create_workspace(user=user)
with pytest.raises(serializers.ValidationError):
JobHandler().create_and_start_job(
user,
AirtableImportJobType.type,
workspace_id=workspace.id,
airtable_share_url="https://airtable.com/shrXxmp0WmqsTkFWTz",
session="test",
)
with pytest.raises(serializers.ValidationError):
JobHandler().create_and_start_job(
user,
AirtableImportJobType.type,
workspace_id=workspace.id,
airtable_share_url="https://airtable.com/shrXxmp0WmqsTkFWTz",
session_signature="test",
)
@pytest.mark.django_db
def test_get_airtable_import_job(data_fixture):
user = data_fixture.create_user()

View file

@ -228,6 +228,49 @@ def test_create_airtable_import_job_skip_files(
assert args[1]["config"].skip_files is True
@pytest.mark.django_db(transaction=True)
@patch(
"baserow.contrib.database.airtable.actions.AirtableHandler"
".import_from_airtable_to_workspace"
)
def test_create_airtable_import_job_with_session(
mock_import_from_airtable_to_workspace, data_fixture, api_client
):
mock_import_from_airtable_to_workspace.return_value = (
data_fixture.create_database_application()
)
user, token = data_fixture.create_user_and_token()
workspace = data_fixture.create_workspace(user=user)
long_share_id = (
"shr22aXe5Hj32sPJB/tblU0bav59SSEyOkU/"
"viwyUDJYyQPYuFj1F?blocks=bipEYER8Qq7fLoPbr"
)
response = api_client.post(
reverse("api:jobs:list"),
{
"type": "airtable",
"workspace_id": workspace.id,
"airtable_share_url": f"https://airtable.com/{long_share_id}",
"session": "ses",
"session_signature": "sig",
},
HTTP_AUTHORIZATION=f"JWT {token}",
)
response_json = response.json()
assert response.status_code == HTTP_200_OK
airtable_import_job = AirtableImportJob.objects.all().first()
assert airtable_import_job.session == "ses"
assert airtable_import_job.session_signature == "sig"
assert "session" not in response_json
assert "session_signature" not in response_json
args = mock_import_from_airtable_to_workspace.call_args
assert args[1]["config"].session == "ses"
assert args[1]["config"].session_signature == "sig"
@pytest.mark.django_db
def test_get_airtable_import_job(data_fixture, api_client):
user, token = data_fixture.create_user_and_token()

View file

@ -0,0 +1,8 @@
{
"type": "feature",
"message": "Optionally provide session authentication for Airtable import.",
"domain": "database",
"issue_number": null,
"bullet_points": [],
"created_at": "2025-03-20"
}

View file

@ -13,20 +13,11 @@
class="margin-bottom-2"
>
<FormInput
v-model="values.airtableUrl"
v-model="v$.values.airtableUrl.$model"
:error="v$.values.airtableUrl.$error"
:placeholder="$t('importFromAirtable.airtableShareLinkPaste')"
size="large"
@blur="v$.values.airtableUrl.$touch"
@input="
;[
$emit(
'input',
v$.values.airtableUrl.$invalid ? '' : v$.values.airtableUrl.$model
),
v$.values.airtableUrl.$touch(),
]
"
></FormInput>
<template #error>
{{ $t('importFromAirtable.linkError') }}
@ -46,6 +37,55 @@
/>
</Checkbox>
</div>
<div class="margin-bottom-2">
<Checkbox v-model="values.useSession">
{{ $t('importFromAirtable.useSession') }}
<HelpIcon
:tooltip="$t('importFromAirtable.useSessionHelper')"
:tooltip-content-type="'plain'"
:tooltip-content-classes="[
'tooltip__content--expandable',
'tooltip__content--expandable-plain-text',
]"
:icon="'info-empty'"
/>
</Checkbox>
</div>
<div v-if="values.useSession" class="margin-bottom-2">
<p class="margin-bottom-2">
{{ $t('importFromAirtable.sessionDescription') }}
</p>
<FormGroup
:label="$t('importFromAirtable.sessionLabel')"
:error="v$.values.session.$error"
small-label
required
class="margin-bottom-2"
>
<FormInput
v-model="v$.values.session.$model"
:error="v$.values.session.$error"
placeholder="eyJzZXNz..."
size="large"
@blur="v$.values.session.$touch"
></FormInput>
</FormGroup>
<FormGroup
:label="$t('importFromAirtable.sessionSignatureLabel')"
:error="v$.values.sessionSignature.$error"
small-label
required
class="margin-bottom-2"
>
<FormInput
v-model="v$.values.sessionSignature.$model"
:error="v$.values.session.$error"
placeholder="OYncZ-Nz..."
size="large"
@blur="v$.values.sessionSignature.$touch"
></FormInput>
</FormGroup>
</div>
<slot></slot>
</form>
</template>
@ -53,6 +93,8 @@
<script>
import form from '@baserow/modules/core/mixins/form'
import { useVuelidate } from '@vuelidate/core'
import { required } from '@vuelidate/validators'
export default {
name: 'AirtableImportForm',
mixins: [form],
@ -64,13 +106,25 @@ export default {
values: {
airtableUrl: '',
skipFiles: false,
useSession: false,
session: '',
sessionSignature: '',
},
}
},
watch: {
values: {
handler(values) {
this.$emit('input', values)
},
deep: true,
},
},
validations() {
return {
const rules = {
values: {
airtableUrl: {
required,
valid(value) {
const regex = /https:\/\/airtable.com\/[shr|app](.*)$/g
return !!value.match(regex)
@ -78,6 +132,13 @@ export default {
},
},
}
if (this.values.useSession) {
rules.values.session = { required }
rules.values.sessionSignature = { required }
}
return rules
},
}
</script>

View file

@ -72,7 +72,9 @@ export default {
const { data } = await AirtableService(this.$client).create(
this.workspace.id,
values.airtableUrl,
values.skipFiles
values.skipFiles,
values.useSession ? values.session : null,
values.useSession ? values.sessionSignature : null
)
this.startJobPoller(data)
} catch (error) {

View file

@ -29,7 +29,7 @@
<AirtableImportForm
v-if="selectedType === 'airtable'"
ref="airtable"
@input="handleAirtableInput"
@input="updateValue($event)"
></AirtableImportForm>
</div>
</template>
@ -63,7 +63,6 @@ export default {
],
selectedTypeIndex: 0,
name: '',
airtableUrl: '',
}
},
@ -75,30 +74,25 @@ export default {
return ['scratch', 'import'].includes(this.selectedType)
},
},
watch: {
selectedTypeIndex() {
this.airtableUrl = ''
},
},
mounted() {
this.updateValue()
},
methods: {
isValid() {
return !this.v$.$invalid && this.v$.$dirty
if (this.selectedType === 'airtable') {
const airtable = this.$refs.airtable
return !!airtable && !airtable.v$.$invalid && airtable.v$.$dirty
} else {
return !this.v$.$invalid && this.v$.$dirty
}
},
updateValue() {
updateValue(airtable = {}) {
this.$emit('update-data', {
name: this.name,
type: this.selectedType,
airtableUrl: this.airtableUrl,
...airtable,
})
},
handleAirtableInput(event) {
this.v$.airtableUrl.$model = event
this.v$.airtableUrl.$touch()
this.updateValue()
},
},
validations() {
const rules = {}
@ -106,10 +100,6 @@ export default {
rules.name = {
required: helpers.withMessage(this.$t('error.requiredField'), required),
}
} else if (this.selectedType === 'airtable') {
rules.airtableUrl = {
required: helpers.withMessage(this.$t('error.requiredField'), required),
}
}
return rules
},

View file

@ -781,7 +781,12 @@
"errorJobAlreadyRunningDescription": "Another import job is already running. You need to wait for that one to finish before starting another.",
"linkError": "The link should look like: https://airtable.com/shrxxxxxxxxxxxxxx",
"skipFiles": "Skip importing files",
"skipFilesHelper": "An Airtable base with many files can slow down the import. Enabling this skips the import of the files."
"skipFilesHelper": "An Airtable base with many files can slow down the import. Enabling this skips the import of the files.",
"useSession": "Session authentication",
"useSessionHelper": "Use this if the publicly shared base requires authentication.",
"sessionDescription": "If the import responds with \"The Airtable base requires authentication.\" then it could be that the organizational settings in Airtable prevent accessing the Airtable base without authenticating first. To do this, the session and signature must be manually extracted. Visit the URL of the publicly shared base in your browser and sign in, if needed. Click on the application menu by clicking in the top right corner -> \"More tools\" -> \"Developer tools\". Open the \"Application\" (in Firefox \"Storage\") tab and click on \"https://airtable.com\". Now find the \"__Host-airtable-session\" and \"__Host-airtable-session.sig\" cookie values, and paste them in the inputs below.",
"sessionLabel": "Session cookie (__Host-airtable-session)",
"sessionSignatureLabel": "Session signature cookie (__Host-airtable-session.sig)"
},
"chooseSingleSelectField": {
"addSelectField": "Add single select field",

View file

@ -65,9 +65,16 @@ export class DatabaseOnboardingType extends OnboardingType {
if (type === 'airtable') {
const workspace = responses[WorkspaceOnboardingType.getType()]
const airtableUrl = data[this.getType()].airtableUrl
const skipFiles = data[this.getType()].skipFiles
const useSession = data[this.getType()].useSession
const session = data[this.getType()].session
const sessionSignature = data[this.getType()].sessionSignature
const { data: job } = await AirtableService(this.app.$client).create(
workspace.id,
airtableUrl
airtableUrl,
skipFiles,
useSession ? session : null,
useSession ? sessionSignature : null
)
// Responds with the newly created job, so that the `getJobForPolling` can use

View file

@ -1,11 +1,13 @@
export default (client) => {
return {
create(workspaceId, shareURL, skipFiles) {
create(workspaceId, shareURL, skipFiles, session, sessionSignature) {
return client.post(`/jobs/`, {
type: 'airtable',
workspace_id: workspaceId,
airtable_share_url: shareURL,
skip_files: skipFiles,
session,
session_signature: sessionSignature,
})
},
}