1
0
Fork 0
mirror of https://gitlab.com/bramw/baserow.git synced 2025-04-14 00:59:06 +00:00

Airtable import report

This commit is contained in:
Bram Wiepjes 2025-02-13 19:32:24 +00:00
parent 5c2511da13
commit 1adb520499
11 changed files with 913 additions and 69 deletions
backend
changelog/entries/unreleased/feature
web-frontend/modules/core/assets/scss

View file

@ -1,12 +1,9 @@
import traceback
from datetime import datetime, timezone from datetime import datetime, timezone
from decimal import Decimal from decimal import Decimal
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from loguru import logger
from baserow.contrib.database.export_serialized import DatabaseExportSerializedStructure from baserow.contrib.database.export_serialized import DatabaseExportSerializedStructure
from baserow.contrib.database.fields.models import ( from baserow.contrib.database.fields.models import (
NUMBER_MAX_DECIMAL_PLACES, NUMBER_MAX_DECIMAL_PLACES,
@ -32,13 +29,23 @@ from baserow.contrib.database.fields.registries import field_type_registry
from .config import AirtableImportConfig from .config import AirtableImportConfig
from .helpers import import_airtable_date_type_options, set_select_options_on_field from .helpers import import_airtable_date_type_options, set_select_options_on_field
from .import_report import (
ERROR_TYPE_DATA_TYPE_MISMATCH,
ERROR_TYPE_UNSUPPORTED_FEATURE,
SCOPE_CELL,
SCOPE_FIELD,
AirtableImportReport,
)
from .registry import AirtableColumnType from .registry import AirtableColumnType
from .utils import get_airtable_row_primary_value
class TextAirtableColumnType(AirtableColumnType): class TextAirtableColumnType(AirtableColumnType):
type = "text" type = "text"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
validator_name = raw_airtable_column.get("typeOptions", {}).get("validatorName") validator_name = raw_airtable_column.get("typeOptions", {}).get("validatorName")
if validator_name == "url": if validator_name == "url":
return URLField() return URLField()
@ -50,17 +57,30 @@ class TextAirtableColumnType(AirtableColumnType):
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
if isinstance(baserow_field, (EmailField, URLField)): if isinstance(baserow_field, (EmailField, URLField)):
try: try:
field_type = field_type_registry.get_by_model(baserow_field) field_type = field_type_registry.get_by_model(baserow_field)
field_type.validator(value) field_type.validator(value)
except ValidationError: except ValidationError:
row_name = get_airtable_row_primary_value(
raw_airtable_table, raw_airtable_row
)
import_report.add_failed(
f"Row: \"{row_name}\", field: \"{raw_airtable_column['name']}\"",
SCOPE_CELL,
raw_airtable_table["name"],
ERROR_TYPE_DATA_TYPE_MISMATCH,
f'Cell value "{value}" was left empty because it didn\'t pass the email or URL validation.',
)
return "" return ""
return value return value
@ -69,24 +89,31 @@ class TextAirtableColumnType(AirtableColumnType):
class MultilineTextAirtableColumnType(AirtableColumnType): class MultilineTextAirtableColumnType(AirtableColumnType):
type = "multilineText" type = "multilineText"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
return LongTextField() return LongTextField()
class RichTextTextAirtableColumnType(AirtableColumnType): class RichTextTextAirtableColumnType(AirtableColumnType):
type = "richText" type = "richText"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
return LongTextField() return LongTextField()
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
# We don't support rich text formatting yet, so this converts the value to # We don't support rich text formatting yet, so this converts the value to
# plain text. # plain text.
@ -124,7 +151,9 @@ class RichTextTextAirtableColumnType(AirtableColumnType):
class NumberAirtableColumnType(AirtableColumnType): class NumberAirtableColumnType(AirtableColumnType):
type = "number" type = "number"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
type_options = raw_airtable_column.get("typeOptions", {}) type_options = raw_airtable_column.get("typeOptions", {})
decimal_places = 0 decimal_places = 0
@ -142,11 +171,14 @@ class NumberAirtableColumnType(AirtableColumnType):
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
if value is not None: if value is not None:
value = Decimal(value) value = Decimal(value)
@ -160,7 +192,9 @@ class NumberAirtableColumnType(AirtableColumnType):
class RatingAirtableColumnType(AirtableColumnType): class RatingAirtableColumnType(AirtableColumnType):
type = "rating" type = "rating"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
return RatingField( return RatingField(
max_value=raw_airtable_column.get("typeOptions", {}).get("max", 5) max_value=raw_airtable_column.get("typeOptions", {}).get("max", 5)
) )
@ -169,17 +203,22 @@ class RatingAirtableColumnType(AirtableColumnType):
class CheckboxAirtableColumnType(AirtableColumnType): class CheckboxAirtableColumnType(AirtableColumnType):
type = "checkbox" type = "checkbox"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
return BooleanField() return BooleanField()
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
return "true" if value else "false" return "true" if value else "false"
@ -187,7 +226,9 @@ class CheckboxAirtableColumnType(AirtableColumnType):
class DateAirtableColumnType(AirtableColumnType): class DateAirtableColumnType(AirtableColumnType):
type = "date" type = "date"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
type_options = raw_airtable_column.get("typeOptions", {}) type_options = raw_airtable_column.get("typeOptions", {})
# Check if a timezone is provided in the type options, if so, we might want # Check if a timezone is provided in the type options, if so, we might want
# to use that timezone for the conversion later on. # to use that timezone for the conversion later on.
@ -196,6 +237,13 @@ class DateAirtableColumnType(AirtableColumnType):
# date_force_timezone=None it the equivalent of airtable_timezone="client". # date_force_timezone=None it the equivalent of airtable_timezone="client".
if airtable_timezone == "client": if airtable_timezone == "client":
import_report.add_failed(
raw_airtable_column["name"],
SCOPE_FIELD,
raw_airtable_table.get("name", ""),
ERROR_TYPE_UNSUPPORTED_FEATURE,
"The date field was imported, but the client timezone setting was dropped.",
)
airtable_timezone = None airtable_timezone = None
return DateField( return DateField(
@ -207,11 +255,14 @@ class DateAirtableColumnType(AirtableColumnType):
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
if value is None: if value is None:
return value return value
@ -220,10 +271,17 @@ class DateAirtableColumnType(AirtableColumnType):
value = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ").replace( value = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
tzinfo=timezone.utc tzinfo=timezone.utc
) )
except ValueError: except ValueError as e:
tb = traceback.format_exc() row_name = get_airtable_row_primary_value(
print(f"Importing Airtable datetime cell failed because of: \n{tb}") raw_airtable_table, raw_airtable_row
logger.error(f"Importing Airtable datetime cell failed because of: \n{tb}") )
import_report.add_failed(
f"Row: \"{row_name}\", field: \"{raw_airtable_column['name']}\"",
SCOPE_CELL,
raw_airtable_table["name"],
ERROR_TYPE_DATA_TYPE_MISMATCH,
f'Cell value was left empty because it didn\'t pass the datetime validation with error: "{str(e)}"',
)
return None return None
if baserow_field.date_include_time: if baserow_field.date_include_time:
@ -243,25 +301,39 @@ class DateAirtableColumnType(AirtableColumnType):
class FormulaAirtableColumnType(AirtableColumnType): class FormulaAirtableColumnType(AirtableColumnType):
type = "formula" type = "formula"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
type_options = raw_airtable_column.get("typeOptions", {}) type_options = raw_airtable_column.get("typeOptions", {})
display_type = type_options.get("displayType", "") display_type = type_options.get("displayType", "")
airtable_timezone = type_options.get("timeZone", None) airtable_timezone = type_options.get("timeZone", None)
date_show_tzinfo = type_options.get("shouldDisplayTimeZone", False) date_show_tzinfo = type_options.get("shouldDisplayTimeZone", False)
is_last_modified = display_type == "lastModifiedTime"
is_created = display_type == "createdTime"
if is_last_modified or is_created and airtable_timezone == "client":
import_report.add_failed(
raw_airtable_column["name"],
SCOPE_FIELD,
raw_airtable_table.get("name", ""),
ERROR_TYPE_UNSUPPORTED_FEATURE,
"The field was imported, but the client timezone setting was dropped.",
)
# date_force_timezone=None it the equivalent of airtable_timezone="client". # date_force_timezone=None it the equivalent of airtable_timezone="client".
if airtable_timezone == "client": if airtable_timezone == "client":
airtable_timezone = None airtable_timezone = None
# The formula conversion isn't support yet, but because the Created on and # The formula conversion isn't support yet, but because the Created on and
# Last modified fields work as a formula, we can convert those. # Last modified fields work as a formula, we can convert those.
if display_type == "lastModifiedTime": if is_last_modified:
return LastModifiedField( return LastModifiedField(
date_show_tzinfo=date_show_tzinfo, date_show_tzinfo=date_show_tzinfo,
date_force_timezone=airtable_timezone, date_force_timezone=airtable_timezone,
**import_airtable_date_type_options(type_options), **import_airtable_date_type_options(type_options),
) )
elif display_type == "createdTime": elif is_created:
return CreatedOnField( return CreatedOnField(
date_show_tzinfo=date_show_tzinfo, date_show_tzinfo=date_show_tzinfo,
date_force_timezone=airtable_timezone, date_force_timezone=airtable_timezone,
@ -271,11 +343,14 @@ class FormulaAirtableColumnType(AirtableColumnType):
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
if isinstance(baserow_field, CreatedOnField): if isinstance(baserow_field, CreatedOnField):
# If `None`, the value will automatically be populated from the # If `None`, the value will automatically be populated from the
@ -295,7 +370,9 @@ class FormulaAirtableColumnType(AirtableColumnType):
class ForeignKeyAirtableColumnType(AirtableColumnType): class ForeignKeyAirtableColumnType(AirtableColumnType):
type = "foreignKey" type = "foreignKey"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
type_options = raw_airtable_column.get("typeOptions", {}) type_options = raw_airtable_column.get("typeOptions", {})
foreign_table_id = type_options.get("foreignTableId") foreign_table_id = type_options.get("foreignTableId")
@ -307,38 +384,64 @@ class ForeignKeyAirtableColumnType(AirtableColumnType):
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
foreign_table_id = raw_airtable_column["typeOptions"]["foreignTableId"] foreign_table_id = raw_airtable_column["typeOptions"]["foreignTableId"]
# Airtable doesn't always provide an object with a `foreignRowId`. This can # Airtable doesn't always provide an object with a `foreignRowId`. This can
# happen with a synced table for example. Because we don't have access to the # happen with a synced table for example. Because we don't have access to the
# source in that case, we need to skip them. # source in that case, we need to skip them.
return [ foreign_row_ids = [v["foreignRowId"] for v in value if "foreignRowId" in v]
row_id_mapping[foreign_table_id][v["foreignRowId"]]
for v in value value = []
if "foreignRowId" in v for foreign_row_id in foreign_row_ids:
] try:
value.append(row_id_mapping[foreign_table_id][foreign_row_id])
except KeyError:
# If a key error is raised, then we don't have the foreign row id in
# the mapping. This can happen if the data integrity is compromised in
# the Airtable base. We don't want to fail the import, so we're
# reporting instead.
row_name = get_airtable_row_primary_value(
raw_airtable_table, raw_airtable_row
)
import_report.add_failed(
f"Row: \"{row_name}\", field: \"{raw_airtable_column['name']}\"",
SCOPE_CELL,
raw_airtable_table["name"],
ERROR_TYPE_DATA_TYPE_MISMATCH,
f'Foreign row id "{foreign_row_id}" was not added as relationship in the cell value was because it was not found in the mapping.',
)
return value
class MultipleAttachmentAirtableColumnType(AirtableColumnType): class MultipleAttachmentAirtableColumnType(AirtableColumnType):
type = "multipleAttachment" type = "multipleAttachment"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
return FileField() return FileField()
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
new_value = [] new_value = []
@ -367,16 +470,21 @@ class SelectAirtableColumnType(AirtableColumnType):
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping: Dict[str, Dict[str, int]], row_id_mapping: Dict[str, Dict[str, int]],
table: dict,
raw_airtable_row: dict,
raw_airtable_column: dict, raw_airtable_column: dict,
baserow_field: Field, baserow_field: Field,
value: Any, value: Any,
files_to_download: Dict[str, str], files_to_download: Dict[str, str],
config: AirtableImportConfig, config: AirtableImportConfig,
import_report: AirtableImportReport,
): ):
# use field id and option id for uniqueness # use field id and option id for uniqueness
return f"{raw_airtable_column.get('id')}_{value}" return f"{raw_airtable_column.get('id')}_{value}"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
field = SingleSelectField() field = SingleSelectField()
field = set_select_options_on_field( field = set_select_options_on_field(
field, field,
@ -392,17 +500,22 @@ class MultiSelectAirtableColumnType(AirtableColumnType):
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping: Dict[str, Dict[str, int]], row_id_mapping: Dict[str, Dict[str, int]],
table: dict,
raw_airtable_row: dict,
raw_airtable_column: dict, raw_airtable_column: dict,
baserow_field: Field, baserow_field: Field,
value: Any, value: Any,
files_to_download: Dict[str, str], files_to_download: Dict[str, str],
config: AirtableImportConfig, config: AirtableImportConfig,
import_report: AirtableImportReport,
): ):
# use field id and option id for uniqueness # use field id and option id for uniqueness
column_id = raw_airtable_column.get("id") column_id = raw_airtable_column.get("id")
return [f"{column_id}_{val}" for val in value] return [f"{column_id}_{val}" for val in value]
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
field = MultipleSelectField() field = MultipleSelectField()
field = set_select_options_on_field( field = set_select_options_on_field(
field, field,
@ -415,40 +528,60 @@ class MultiSelectAirtableColumnType(AirtableColumnType):
class PhoneAirtableColumnType(AirtableColumnType): class PhoneAirtableColumnType(AirtableColumnType):
type = "phone" type = "phone"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
return PhoneNumberField() return PhoneNumberField()
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
try: try:
field_type = field_type_registry.get_by_model(baserow_field) field_type = field_type_registry.get_by_model(baserow_field)
field_type.validator(value) field_type.validator(value)
return value return value
except ValidationError: except ValidationError:
row_name = get_airtable_row_primary_value(
raw_airtable_table, raw_airtable_row
)
import_report.add_failed(
f"Row: \"{row_name}\", field: \"{raw_airtable_column['name']}\"",
SCOPE_CELL,
raw_airtable_table["name"],
ERROR_TYPE_DATA_TYPE_MISMATCH,
f'Cell value "{value}" was left empty because it didn\'t pass the phone number validation.',
)
return "" return ""
class CountAirtableColumnType(AirtableColumnType): class CountAirtableColumnType(AirtableColumnType):
type = "count" type = "count"
def to_baserow_field(self, raw_airtable_table, raw_airtable_column, config): def to_baserow_field(
self, raw_airtable_table, raw_airtable_column, config, import_report
):
type_options = raw_airtable_column.get("typeOptions", {}) type_options = raw_airtable_column.get("typeOptions", {})
return CountField(through_field_id=type_options.get("relationColumnId")) return CountField(through_field_id=type_options.get("relationColumnId"))
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping, row_id_mapping,
raw_airtable_table,
raw_airtable_row,
raw_airtable_column, raw_airtable_column,
baserow_field, baserow_field,
value, value,
files_to_download, files_to_download,
config, config,
import_report,
): ):
return None return None

View file

@ -40,6 +40,14 @@ from .exceptions import (
AirtableImportNotRespectingConfig, AirtableImportNotRespectingConfig,
AirtableShareIsNotABase, AirtableShareIsNotABase,
) )
from .import_report import (
ERROR_TYPE_UNSUPPORTED_FEATURE,
SCOPE_AUTOMATIONS,
SCOPE_FIELD,
SCOPE_INTERFACES,
SCOPE_VIEW,
AirtableImportReport,
)
User = get_user_model() User = get_user_model()
@ -199,6 +207,7 @@ class AirtableHandler:
table: dict, table: dict,
column: dict, column: dict,
config: AirtableImportConfig, config: AirtableImportConfig,
import_report: AirtableImportReport,
) -> Union[Tuple[None, None, None], Tuple[Field, FieldType, AirtableColumnType]]: ) -> Union[Tuple[None, None, None], Tuple[Field, FieldType, AirtableColumnType]]:
""" """
Converts the provided Airtable column dict to the right Baserow field object. Converts the provided Airtable column dict to the right Baserow field object.
@ -208,6 +217,8 @@ class AirtableHandler:
:param column: The Airtable column dict. These values will be converted to :param column: The Airtable column dict. These values will be converted to
Baserow format. Baserow format.
:param config: Additional configuration related to the import. :param config: Additional configuration related to the import.
:param import_report: Used to collect what wasn't imported to report to the
user.
:return: The converted Baserow field, field type and the Airtable column type. :return: The converted Baserow field, field type and the Airtable column type.
""" """
@ -215,9 +226,7 @@ class AirtableHandler:
baserow_field, baserow_field,
airtable_column_type, airtable_column_type,
) = airtable_column_type_registry.from_airtable_column_to_serialized( ) = airtable_column_type_registry.from_airtable_column_to_serialized(
table, table, column, config, import_report
column,
config,
) )
if baserow_field is None: if baserow_field is None:
@ -247,17 +256,20 @@ class AirtableHandler:
@staticmethod @staticmethod
def to_baserow_row_export( def to_baserow_row_export(
table: dict,
row_id_mapping: Dict[str, Dict[str, int]], row_id_mapping: Dict[str, Dict[str, int]],
column_mapping: Dict[str, dict], column_mapping: Dict[str, dict],
row: dict, row: dict,
index: int, index: int,
files_to_download: Dict[str, str], files_to_download: Dict[str, str],
config: AirtableImportConfig, config: AirtableImportConfig,
import_report: AirtableImportReport,
) -> dict: ) -> dict:
""" """
Converts the provided Airtable record to a Baserow row by looping over the field Converts the provided Airtable record to a Baserow row by looping over the field
types and executing the `from_airtable_column_value_to_serialized` method. types and executing the `from_airtable_column_value_to_serialized` method.
:param table: The Airtable table dict.
:param row_id_mapping: A mapping containing the table as key as the value is :param row_id_mapping: A mapping containing the table as key as the value is
another mapping where the Airtable row id maps the Baserow row id. another mapping where the Airtable row id maps the Baserow row id.
:param column_mapping: A mapping where the Airtable column id is the value and :param column_mapping: A mapping where the Airtable column id is the value and
@ -269,6 +281,8 @@ class AirtableHandler:
be downloaded. The key is the file name and the value the URL. Additional be downloaded. The key is the file name and the value the URL. Additional
files can be added to this dict. files can be added to this dict.
:param config: Additional configuration related to the import. :param config: Additional configuration related to the import.
:param import_report: Used to collect what wasn't imported to report to the
user.
:return: The converted row in Baserow export format. :return: The converted row in Baserow export format.
""" """
@ -300,11 +314,14 @@ class AirtableHandler:
"airtable_column_type" "airtable_column_type"
].to_baserow_export_serialized_value( ].to_baserow_export_serialized_value(
row_id_mapping, row_id_mapping,
table,
row,
mapping_values["raw_airtable_column"], mapping_values["raw_airtable_column"],
mapping_values["baserow_field"], mapping_values["baserow_field"],
column_value, column_value,
files_to_download, files_to_download,
config, config,
import_report,
) )
exported_row[f"field_{column_id}"] = baserow_serialized_value exported_row[f"field_{column_id}"] = baserow_serialized_value
@ -380,6 +397,8 @@ class AirtableHandler:
:param schema: An object containing the schema of the Airtable base. :param schema: An object containing the schema of the Airtable base.
:param tables: a list containing the table data. :param tables: a list containing the table data.
:param config: Additional configuration related to the import. :param config: Additional configuration related to the import.
:param import_report: Used to collect what wasn't imported to report to the
user.
:param progress_builder: If provided will be used to build a child progress bar :param progress_builder: If provided will be used to build a child progress bar
and report on this methods progress to the parent of the progress_builder. and report on this methods progress to the parent of the progress_builder.
:param download_files_buffer: Optionally a file buffer can be provided to store :param download_files_buffer: Optionally a file buffer can be provided to store
@ -388,6 +407,11 @@ class AirtableHandler:
containing the user files. containing the user files.
""" """
# This instance allows collecting what we weren't able to import, like
# incompatible fields, filters, etc. This will later be used to create a table
# with an overview of what wasn't imported.
import_report = AirtableImportReport()
progress = ChildProgressBuilder.build(progress_builder, child_total=1000) progress = ChildProgressBuilder.build(progress_builder, child_total=1000)
converting_progress = progress.create_child( converting_progress = progress.create_child(
represents_progress=500, represents_progress=500,
@ -440,12 +464,19 @@ class AirtableHandler:
baserow_field, baserow_field,
baserow_field_type, baserow_field_type,
airtable_column_type, airtable_column_type,
) = cls.to_baserow_field(table, column, config) ) = cls.to_baserow_field(table, column, config, import_report)
converting_progress.increment(state=AIRTABLE_EXPORT_JOB_CONVERTING) converting_progress.increment(state=AIRTABLE_EXPORT_JOB_CONVERTING)
# None means that none of the field types know how to parse this field, # None means that none of the field types know how to parse this field,
# so we must ignore it. # so we must ignore it.
if baserow_field is None: if baserow_field is None:
import_report.add_failed(
column["name"],
SCOPE_FIELD,
table["name"],
ERROR_TYPE_UNSUPPORTED_FEATURE,
f"""Field "{column['name']}" with field type {column["type"]} was not imported because it is not supported.""",
)
continue continue
# Construct a mapping where the Airtable column id is the key and the # Construct a mapping where the Airtable column id is the key and the
@ -483,7 +514,9 @@ class AirtableHandler:
baserow_field, baserow_field,
baserow_field_type, baserow_field_type,
airtable_column_type, airtable_column_type,
) = cls.to_baserow_field(table, airtable_column, config) ) = cls.to_baserow_field(
table, airtable_column, config, import_report
)
baserow_field.primary = True baserow_field.primary = True
field_mapping["primary_id"] = { field_mapping["primary_id"] = {
"baserow_field": baserow_field, "baserow_field": baserow_field,
@ -507,12 +540,14 @@ class AirtableHandler:
for row_index, row in enumerate(tables[table["id"]]["rows"]): for row_index, row in enumerate(tables[table["id"]]["rows"]):
exported_rows.append( exported_rows.append(
cls.to_baserow_row_export( cls.to_baserow_row_export(
table,
row_id_mapping, row_id_mapping,
field_mapping, field_mapping,
row, row,
row_index, row_index,
files_to_download_for_table, files_to_download_for_table,
config, config,
import_report,
) )
) )
converting_progress.increment(state=AIRTABLE_EXPORT_JOB_CONVERTING) converting_progress.increment(state=AIRTABLE_EXPORT_JOB_CONVERTING)
@ -529,6 +564,18 @@ class AirtableHandler:
empty_serialized_grid_view["id"] = view_id empty_serialized_grid_view["id"] = view_id
exported_views = [empty_serialized_grid_view] exported_views = [empty_serialized_grid_view]
# Loop over all views to add them to them as failed to the import report
# because the views are not yet supported.
for view in table["views"]:
import_report.add_failed(
view["name"],
SCOPE_VIEW,
table["name"],
ERROR_TYPE_UNSUPPORTED_FEATURE,
f"View \"{view['name']}\" was not imported because views are not "
f"yet supported during import.",
)
exported_table = DatabaseExportSerializedStructure.table( exported_table = DatabaseExportSerializedStructure.table(
id=table["id"], id=table["id"],
name=table["name"], name=table["name"],
@ -550,6 +597,29 @@ class AirtableHandler:
url = signed_user_content_urls[url] url = signed_user_content_urls[url]
files_to_download[file_name] = url files_to_download[file_name] = url
# Just to be really clear that the automations and interfaces are not included.
import_report.add_failed(
"All automations",
SCOPE_AUTOMATIONS,
"",
ERROR_TYPE_UNSUPPORTED_FEATURE,
"Baserow doesn't support automations.",
)
import_report.add_failed(
"All interfaces",
SCOPE_INTERFACES,
"",
ERROR_TYPE_UNSUPPORTED_FEATURE,
"Baserow doesn't support interfaces.",
)
# Convert the import report to the serialized export format of a Baserow table,
# so that a new table is created with the import report result for the user to
# see.
exported_tables.append(
import_report.get_baserow_export_table(len(schema["tableSchemas"]) + 1)
)
exported_database = CoreExportSerializedStructure.application( exported_database = CoreExportSerializedStructure.application(
id=1, id=1,
name=init_data["rawApplications"][init_data["sharedApplicationId"]]["name"], name=init_data["rawApplications"][init_data["sharedApplicationId"]]["name"],

View file

@ -0,0 +1,145 @@
import dataclasses
import random
from baserow.contrib.database.export_serialized import DatabaseExportSerializedStructure
from baserow.contrib.database.fields.models import (
LongTextField,
SelectOption,
SingleSelectField,
TextField,
)
from baserow.contrib.database.fields.registries import field_type_registry
from baserow.contrib.database.views.models import GridView
from baserow.contrib.database.views.registries import view_type_registry
from baserow.core.constants import BASEROW_COLORS
SCOPE_FIELD = SelectOption(id="scope_field", value="Field", color="light-blue", order=1)
SCOPE_CELL = SelectOption(id="scope_cell", value="Cell", color="light-green", order=2)
SCOPE_VIEW = SelectOption(id="scope_view", value="View", color="light-cyan", order=3)
SCOPE_AUTOMATIONS = SelectOption(
id="scope_automations", value="Automations", color="light-orange", order=4
)
SCOPE_INTERFACES = SelectOption(
id="scope_interfaces", value="Interfaces", color="light-yellow", order=5
)
ALL_SCOPES = [SCOPE_FIELD, SCOPE_CELL, SCOPE_VIEW, SCOPE_AUTOMATIONS, SCOPE_INTERFACES]
ERROR_TYPE_UNSUPPORTED_FEATURE = SelectOption(
id="error_type_unsupported_feature",
value="Unsupported feature",
color="yellow",
order=1,
)
ERROR_TYPE_DATA_TYPE_MISMATCH = SelectOption(
id="error_type_data_type_mismatch", value="Data type mismatch", color="red", order=2
)
ERROR_TYPE_OTHER = SelectOption(
id="error_type_other", value="Other", color="brown", order=3
)
ALL_ERROR_TYPES = [
ERROR_TYPE_UNSUPPORTED_FEATURE,
ERROR_TYPE_DATA_TYPE_MISMATCH,
ERROR_TYPE_OTHER,
]
@dataclasses.dataclass
class ImportReportFailedItem:
object_name: str
scope: str
table: str
error_type: str
message: str
class AirtableImportReport:
def __init__(self):
self.items = []
def add_failed(self, object_name, scope, table, error_type, message):
self.items.append(
ImportReportFailedItem(object_name, scope, table, error_type, message)
)
def get_baserow_export_table(self, order: int) -> dict:
# Create an empty grid view because the importing of views doesn't work
# yet. It's a bit quick and dirty, but it will be replaced soon.
grid_view = GridView(pk=0, id=None, name="Grid", order=1)
grid_view.get_field_options = lambda *args, **kwargs: []
grid_view_type = view_type_registry.get_by_model(grid_view)
empty_serialized_grid_view = grid_view_type.export_serialized(
grid_view, None, None, None
)
empty_serialized_grid_view["id"] = 0
exported_views = [empty_serialized_grid_view]
unique_table_names = {item.table for item in self.items if item.table}
unique_table_select_options = {
name: SelectOption(
id=f"table_{name}",
value=name,
color=random.choice(BASEROW_COLORS), # nosec
order=index + 1,
)
for index, name in enumerate(unique_table_names)
}
object_name_field = TextField(
id="object_name",
name="Object name",
order=0,
primary=True,
)
scope_field = SingleSelectField(id="scope", pk="scope", name="Scope", order=1)
scope_field._prefetched_objects_cache = {"select_options": ALL_SCOPES}
table_field = SingleSelectField(
id="table", pk="error_type", name="Table", order=2
)
table_field._prefetched_objects_cache = {
"select_options": unique_table_select_options.values()
}
error_field_type = SingleSelectField(
id="error_type", pk="error_type", name="Error type", order=3
)
error_field_type._prefetched_objects_cache = {"select_options": ALL_ERROR_TYPES}
message_field = LongTextField(id="message", name="Message", order=4)
fields = [
object_name_field,
scope_field,
table_field,
error_field_type,
message_field,
]
exported_fields = [
field_type_registry.get_by_model(field).export_serialized(field)
for field in fields
]
exported_rows = []
for index, item in enumerate(self.items):
table_select_option = unique_table_select_options.get(item.table, None)
row = DatabaseExportSerializedStructure.row(
id=index + 1,
order=f"{index + 1}.00000000000000000000",
created_on=None,
updated_on=None,
)
row["field_object_name"] = item.object_name
row["field_scope"] = item.scope.id
row["field_table"] = table_select_option.id if table_select_option else None
row["field_error_type"] = item.error_type.id
row["field_message"] = item.message
exported_rows.append(row)
exported_table = DatabaseExportSerializedStructure.table(
id="report",
name="Airtable import report",
order=order,
fields=exported_fields,
views=exported_views,
rows=exported_rows,
data_sync=None,
)
return exported_table

View file

@ -2,6 +2,7 @@ from datetime import tzinfo
from typing import Any, Dict, Tuple, Union from typing import Any, Dict, Tuple, Union
from baserow.contrib.database.airtable.config import AirtableImportConfig from baserow.contrib.database.airtable.config import AirtableImportConfig
from baserow.contrib.database.airtable.import_report import AirtableImportReport
from baserow.contrib.database.fields.models import Field from baserow.contrib.database.fields.models import Field
from baserow.core.registry import Instance, Registry from baserow.core.registry import Instance, Registry
@ -13,6 +14,7 @@ class AirtableColumnType(Instance):
raw_airtable_column: dict, raw_airtable_column: dict,
timezone: tzinfo, timezone: tzinfo,
config: AirtableImportConfig, config: AirtableImportConfig,
import_report: AirtableImportReport,
) -> Union[Field, None]: ) -> Union[Field, None]:
""" """
Converts the raw Airtable column to a Baserow field object. It should be Converts the raw Airtable column to a Baserow field object. It should be
@ -24,6 +26,8 @@ class AirtableColumnType(Instance):
converted. converted.
:param timezone: The main timezone used for date conversions if needed. :param timezone: The main timezone used for date conversions if needed.
:param config: Additional configuration related to the import. :param config: Additional configuration related to the import.
:param import_report: Used to collect what wasn't imported to report to the
user.
:return: The Baserow field type related to the Airtable column. If None is :return: The Baserow field type related to the Airtable column. If None is
provided, then the column is ignored in the conversion. provided, then the column is ignored in the conversion.
""" """
@ -33,11 +37,14 @@ class AirtableColumnType(Instance):
def to_baserow_export_serialized_value( def to_baserow_export_serialized_value(
self, self,
row_id_mapping: Dict[str, Dict[str, int]], row_id_mapping: Dict[str, Dict[str, int]],
raw_airtable_table: dict,
raw_airtable_row: dict,
raw_airtable_column: dict, raw_airtable_column: dict,
baserow_field: Field, baserow_field: Field,
value: Any, value: Any,
files_to_download: Dict[str, str], files_to_download: Dict[str, str],
config: AirtableImportConfig, config: AirtableImportConfig,
import_report: AirtableImportReport,
): ):
""" """
This method should convert a raw Airtable row value to a Baserow export row This method should convert a raw Airtable row value to a Baserow export row
@ -47,6 +54,8 @@ class AirtableColumnType(Instance):
:param row_id_mapping: A mapping containing the table as key as the value is :param row_id_mapping: A mapping containing the table as key as the value is
another mapping where the Airtable row id maps the Baserow row id. another mapping where the Airtable row id maps the Baserow row id.
:param raw_airtable_table: The original Airtable table object.
:param raw_airtable_row: The original row object.
:param raw_airtable_column: A dict containing the raw Airtable column values. :param raw_airtable_column: A dict containing the raw Airtable column values.
:param baserow_field: The Baserow field that the column has been converted to. :param baserow_field: The Baserow field that the column has been converted to.
:param value: The raw Airtable value that must be converted. :param value: The raw Airtable value that must be converted.
@ -54,6 +63,8 @@ class AirtableColumnType(Instance):
be downloaded. The key is the file name and the value the URL. Additional be downloaded. The key is the file name and the value the URL. Additional
files can be added to this dict. files can be added to this dict.
:param config: Additional configuration related to the import. :param config: Additional configuration related to the import.
:param import_report: Used to collect what wasn't imported to report to the
user.
:return: The converted value is Baserow export format. :return: The converted value is Baserow export format.
""" """
@ -68,6 +79,7 @@ class AirtableColumnTypeRegistry(Registry):
raw_airtable_table: dict, raw_airtable_table: dict,
raw_airtable_column: dict, raw_airtable_column: dict,
config: AirtableImportConfig, config: AirtableImportConfig,
import_report: AirtableImportReport,
) -> Union[Tuple[Field, AirtableColumnType], Tuple[None, None]]: ) -> Union[Tuple[Field, AirtableColumnType], Tuple[None, None]]:
""" """
Tries to find a Baserow field that matches that raw Airtable column data. If Tries to find a Baserow field that matches that raw Airtable column data. If
@ -76,6 +88,8 @@ class AirtableColumnTypeRegistry(Registry):
:param raw_airtable_table: The raw Airtable table data related to the column. :param raw_airtable_table: The raw Airtable table data related to the column.
:param raw_airtable_column: The raw Airtable column data that must be imported. :param raw_airtable_column: The raw Airtable column data that must be imported.
:param config: Additional configuration related to the import. :param config: Additional configuration related to the import.
:param import_report: Used to collect what wasn't imported to report to the
user.
:return: The related Baserow field and AirtableColumnType that should be used :return: The related Baserow field and AirtableColumnType that should be used
for the conversion. for the conversion.
""" """
@ -84,7 +98,7 @@ class AirtableColumnTypeRegistry(Registry):
type_name = raw_airtable_column.get("type", "") type_name = raw_airtable_column.get("type", "")
airtable_column_type = self.get(type_name) airtable_column_type = self.get(type_name)
baserow_field = airtable_column_type.to_baserow_field( baserow_field = airtable_column_type.to_baserow_field(
raw_airtable_table, raw_airtable_column, config raw_airtable_table, raw_airtable_column, config, import_report
) )
if baserow_field is None: if baserow_field is None:

View file

@ -20,3 +20,22 @@ def extract_share_id_from_url(public_base_url: str) -> str:
) )
return f"{result.group(1)}{result.group(2)}" return f"{result.group(1)}{result.group(2)}"
def get_airtable_row_primary_value(table, row):
"""
Tries to extract the name of a row using the primary value. If empty or not
available, then it falls back on the row ID>
:param table: The table where to extract primary column ID from.
:param row: The row to get the value name for.
:return: The primary value or ID of the row.
"""
primary_column_id = table.get("primaryColumnId", "")
primary_value = row.get("cellValuesByColumnId", {}).get(primary_column_id, None)
if not primary_value or not isinstance(primary_value, str):
primary_value = row["id"]
return primary_value

View file

@ -18,3 +18,35 @@ DATE_TIME_FORMAT = {
# Django's choices to use with models.TextField # Django's choices to use with models.TextField
DATE_TIME_FORMAT_CHOICES = [(k, v["name"]) for k, v in DATE_TIME_FORMAT.items()] DATE_TIME_FORMAT_CHOICES = [(k, v["name"]) for k, v in DATE_TIME_FORMAT.items()]
# Should stay in sync with `light-`, (non-prefixed), and 'dark-' in
# `modules/core/assets/scss/colors.scss::$colors`.
BASEROW_COLORS = [
"light-blue",
"light-cyan",
"light-orange",
"light-yellow",
"light-red",
"light-brown",
"light-purple",
"light-pink",
"light-gray",
"blue",
"cyan",
"orange",
"yellow",
"red",
"brown",
"purple",
"pink",
"gray",
"dark-blue",
"dark-cyan",
"dark-orange",
"dark-yellow",
"dark-red",
"dark-brown",
"dark-purple",
"dark-pink",
"dark-gray",
]

View file

@ -222,7 +222,7 @@ def test_to_baserow_database_export():
assert baserow_database_export["name"] == "Test" assert baserow_database_export["name"] == "Test"
assert baserow_database_export["order"] == 1 assert baserow_database_export["order"] == 1
assert baserow_database_export["type"] == "database" assert baserow_database_export["type"] == "database"
assert len(baserow_database_export["tables"]) == 2 assert len(baserow_database_export["tables"]) == 3 # 2 + import report table
assert baserow_database_export["tables"][0]["id"] == "tblRpq315qnnIcg5IjI" assert baserow_database_export["tables"][0]["id"] == "tblRpq315qnnIcg5IjI"
assert baserow_database_export["tables"][0]["name"] == "Users" assert baserow_database_export["tables"][0]["name"] == "Users"
@ -315,6 +315,29 @@ def test_to_baserow_database_export():
} }
] ]
assert baserow_database_export["tables"][2]["rows"][0] == {
"id": 1,
"order": "1.00000000000000000000",
"created_on": None,
"updated_on": None,
"field_object_name": "All",
"field_scope": "scope_view",
"field_table": "table_Users",
"field_error_type": "error_type_unsupported_feature",
"field_message": 'View "All" was not imported because views are not yet supported during import.',
}
assert baserow_database_export["tables"][2]["rows"][1] == {
"id": 2,
"order": "2.00000000000000000000",
"created_on": None,
"updated_on": None,
"field_object_name": "Name lookup (from Users)",
"field_scope": "scope_field",
"field_table": "table_Data",
"field_error_type": "error_type_unsupported_feature",
"field_message": 'Field "Name lookup (from Users)" with field type lookup was not imported because it is not supported.',
}
@pytest.mark.django_db @pytest.mark.django_db
@responses.activate @responses.activate
@ -511,10 +534,11 @@ def test_import_from_airtable_to_workspace(
assert database.name == "Test" assert database.name == "Test"
all_tables = database.table_set.all() all_tables = database.table_set.all()
assert len(all_tables) == 2 assert len(all_tables) == 3 # 2 + import report
assert all_tables[0].name == "Users" assert all_tables[0].name == "Users"
assert all_tables[1].name == "Data" assert all_tables[1].name == "Data"
assert all_tables[2].name == "Airtable import report"
user_fields = all_tables[0].field_set.all() user_fields = all_tables[0].field_set.all()
assert len(user_fields) == 4 assert len(user_fields) == 4
@ -537,6 +561,85 @@ def test_import_from_airtable_to_workspace(
assert row_1.checkbox is False assert row_1.checkbox is False
@pytest.mark.django_db
@responses.activate
def test_import_from_airtable_to_workspace_with_report_table(data_fixture, tmpdir):
workspace = data_fixture.create_workspace()
base_path = os.path.join(
settings.BASE_DIR, "../../../tests/airtable_responses/basic"
)
storage = FileSystemStorage(location=(str(tmpdir)), base_url="http://localhost")
with open(os.path.join(base_path, "file-sample.txt"), "rb") as file:
responses.add(
responses.GET,
"https://dl.airtable.com/.signed/file-sample.txt",
status=200,
body=file.read(),
)
with open(os.path.join(base_path, "file-sample_500kB.doc"), "rb") as file:
responses.add(
responses.GET,
"https://dl.airtable.com/.attachments/e93dc201ce27080d9ad9df5775527d09/93e85b28/file-sample_500kB.doc",
status=200,
body=file.read(),
)
with open(os.path.join(base_path, "file_example_JPG_100kB.jpg"), "rb") as file:
responses.add(
responses.GET,
"https://dl.airtable.com/.attachments/025730a04991a764bb3ace6d524b45e5/bd61798a/file_example_JPG_100kB.jpg",
status=200,
body=file.read(),
)
with open(os.path.join(base_path, "airtable_base.html"), "rb") as file:
responses.add(
responses.GET,
"https://airtable.com/appZkaH3aWX3ZjT3b",
status=200,
body=file.read(),
headers={"Set-Cookie": "brw=test;"},
)
with open(os.path.join(base_path, "airtable_application.json"), "rb") as file:
responses.add(
responses.GET,
"https://airtable.com/v0.3/application/appZkaH3aWX3ZjT3b/read",
status=200,
body=file.read(),
)
with open(os.path.join(base_path, "airtable_table.json"), "rb") as file:
responses.add(
responses.GET,
"https://airtable.com/v0.3/table/tbl7glLIGtH8C8zGCzb/readData",
status=200,
body=file.read(),
)
progress = Progress(1000)
database = AirtableHandler.import_from_airtable_to_workspace(
workspace,
"appZkaH3aWX3ZjT3b",
storage=storage,
progress_builder=progress.create_child_builder(represents_progress=1000),
)
report_table = database.table_set.last()
assert report_table.name == "Airtable import report"
model = report_table.get_model(attribute_names=True)
row = model.objects.last()
assert row.object_name == "All interfaces"
assert row.scope.value == "Interfaces"
assert row.table is None
assert row.error_type.value == "Unsupported feature"
assert row.message == "Baserow doesn't support interfaces."
@pytest.mark.django_db @pytest.mark.django_db
@responses.activate @responses.activate
def test_import_from_airtable_to_workspace_duplicated_single_select( def test_import_from_airtable_to_workspace_duplicated_single_select(

View file

@ -1,6 +1,9 @@
import pytest import pytest
from baserow.contrib.database.airtable.utils import extract_share_id_from_url from baserow.contrib.database.airtable.utils import (
extract_share_id_from_url,
get_airtable_row_primary_value,
)
def test_extract_share_id_from_url(): def test_extract_share_id_from_url():
@ -28,3 +31,33 @@ def test_extract_share_id_from_url():
extract_share_id_from_url(f"https://airtable.com/{long_share_id}") extract_share_id_from_url(f"https://airtable.com/{long_share_id}")
== long_share_id == long_share_id
) )
def test_get_airtable_row_primary_value_with_primary_field():
airtable_table = {
"name": "Test",
"primaryColumnId": "fldG9y88Zw7q7u4Z7i4",
}
airtable_row = {
"id": "id1",
"cellValuesByColumnId": {"fldG9y88Zw7q7u4Z7i4": "name1"},
}
assert get_airtable_row_primary_value(airtable_table, airtable_row) == "name1"
def test_get_airtable_row_primary_value_without_primary_field():
airtable_table = {
"name": "Test",
"primaryColumnId": "fldG9y88Zw7q7u4Z7i4",
}
airtable_row = {"id": "id1"}
assert get_airtable_row_primary_value(airtable_table, airtable_row) == "id1"
def test_get_airtable_row_primary_value_without_primary_column_id_in_table():
airtable_table = {
"name": "Test",
"primaryColumnId": "test",
}
airtable_row = {"id": "id1"}
assert get_airtable_row_primary_value(airtable_table, airtable_row) == "id1"

View file

@ -0,0 +1,7 @@
{
"type": "feature",
"message": "Airtable import report.",
"issue_number": 3263,
"bullet_points": [],
"created_at": "2025-02-09"
}

View file

@ -191,6 +191,7 @@ $color-cyan-200: $palette-cyan-200 !default;
$color-cyan-300: $palette-cyan-300 !default; $color-cyan-300: $palette-cyan-300 !default;
$color-cyan-400: $palette-cyan-400 !default; $color-cyan-400: $palette-cyan-400 !default;
// Should stay in sync with 'backend/src/baserow/core/constants.py::BASEROW_COLORS'.
$colors: ( $colors: (
'light-blue': $color-primary-100, 'light-blue': $color-primary-100,
'light-green': $color-success-100, 'light-green': $color-success-100,