mirror of
https://gitlab.com/bramw/baserow.git
synced 2025-04-15 09:34:13 +00:00
Resolve "Disable number type guessing on import"
This commit is contained in:
parent
b3c8c6fda1
commit
8ca6829346
6 changed files with 54 additions and 129 deletions
backend
src/baserow/contrib/database
tests/baserow/contrib/database
web-frontend/modules/database/components/table
|
@ -1,5 +1,5 @@
|
|||
import sys
|
||||
import random
|
||||
import sys
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
|
@ -64,7 +64,14 @@ def fill_table_fields(limit, table):
|
|||
# This is a helper cli command, randomness is not being used for any security
|
||||
# or crypto related reasons.
|
||||
field_type_name, all_kwargs = random.choice(allowed_field_list) # nosec
|
||||
kwargs = random.choice(all_kwargs) # nosec
|
||||
# These two kwarg types depend on another field existing, which it might
|
||||
# not as we are picking randomly.
|
||||
allowed_kwargs_list = [
|
||||
kwargs
|
||||
for kwargs in all_kwargs
|
||||
if kwargs["name"] not in ["formula_singleselect", "formula_email"]
|
||||
]
|
||||
kwargs = random.choice(allowed_kwargs_list) # nosec
|
||||
kwargs.pop("primary", None)
|
||||
kwargs["name"] = field_handler.find_next_unused_field_name(
|
||||
table, [kwargs["name"]]
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
from typing import Any, cast, NewType, List, Tuple, Optional, Dict
|
||||
from collections import defaultdict
|
||||
from decimal import Decimal
|
||||
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.models import AbstractUser
|
||||
|
@ -9,8 +7,7 @@ from django.utils import timezone
|
|||
from django.utils import translation
|
||||
from django.utils.translation import gettext as _
|
||||
|
||||
from baserow.core.jobs.handler import JobHandler
|
||||
from baserow.core.jobs.models import Job
|
||||
from baserow.contrib.database.db.schema import safe_django_schema_editor
|
||||
from baserow.contrib.database.fields.constants import RESERVED_BASEROW_FIELD_NAMES
|
||||
from baserow.contrib.database.fields.exceptions import (
|
||||
MaxFieldLimitExceeded,
|
||||
|
@ -18,13 +15,14 @@ from baserow.contrib.database.fields.exceptions import (
|
|||
ReservedBaserowFieldNameException,
|
||||
InvalidBaserowFieldName,
|
||||
)
|
||||
from baserow.contrib.database.fields.registries import field_type_registry
|
||||
from baserow.contrib.database.fields.models import Field
|
||||
from baserow.contrib.database.fields.registries import field_type_registry
|
||||
from baserow.contrib.database.models import Database
|
||||
from baserow.contrib.database.views.handler import ViewHandler
|
||||
from baserow.contrib.database.views.view_types import GridViewType
|
||||
from baserow.core.jobs.handler import JobHandler
|
||||
from baserow.core.jobs.models import Job
|
||||
from baserow.core.trash.handler import TrashHandler
|
||||
from baserow.contrib.database.db.schema import safe_django_schema_editor
|
||||
from .exceptions import (
|
||||
TableDoesNotExist,
|
||||
TableNotInDatabase,
|
||||
|
@ -36,7 +34,6 @@ from .exceptions import (
|
|||
from .models import Table
|
||||
from .signals import table_updated, table_deleted, tables_reordered
|
||||
|
||||
|
||||
TableForUpdate = NewType("TableForUpdate", Table)
|
||||
|
||||
|
||||
|
@ -353,31 +350,8 @@ class TableHandler:
|
|||
|
||||
normalized_data = []
|
||||
|
||||
# Fill missing rows and computes value type frequencies
|
||||
value_type_frequencies = defaultdict(lambda: defaultdict(lambda: 0))
|
||||
for row in data:
|
||||
new_row = []
|
||||
for index, value in enumerate(row):
|
||||
if value is not None:
|
||||
if isinstance(value, int):
|
||||
if len(str(value)) <= 50:
|
||||
value_type_frequencies[index]["number_int"] += 1
|
||||
else:
|
||||
value_type_frequencies[index]["text"] += 1
|
||||
elif isinstance(value, float):
|
||||
sign, digits, exponent = Decimal(str(value)).as_tuple()
|
||||
if abs(exponent) <= 10 and len(digits) <= 50 + 10:
|
||||
value_type_frequencies[index]["number_float"] += 1
|
||||
else:
|
||||
value_type_frequencies[index]["text"] += 1
|
||||
else:
|
||||
value = "" if value is None else str(value)
|
||||
if len(value) > 255:
|
||||
value_type_frequencies[index]["long_text"] += 1
|
||||
else:
|
||||
value_type_frequencies[index]["text"] += 1
|
||||
|
||||
new_row.append(value)
|
||||
new_row = list(row)
|
||||
|
||||
# Fill incomplete rows with empty values
|
||||
for i in range(len(new_row), largest_column_count):
|
||||
|
@ -386,38 +360,8 @@ class TableHandler:
|
|||
normalized_data.append(new_row)
|
||||
|
||||
field_with_type = []
|
||||
tolerated_error_threshold = (
|
||||
len(normalized_data)
|
||||
/ 100
|
||||
* settings.BASEROW_IMPORT_TOLERATED_TYPE_ERROR_THRESHOLD
|
||||
)
|
||||
# Try to guess field type from type frequency
|
||||
for index, field_name in enumerate(fields):
|
||||
frequencies = value_type_frequencies[index]
|
||||
if frequencies["long_text"] > tolerated_error_threshold:
|
||||
field_with_type.append(
|
||||
(field_name, "long_text", {"field_options": {"width": 400}})
|
||||
)
|
||||
elif frequencies["text"] > tolerated_error_threshold:
|
||||
field_with_type.append((field_name, "text", {}))
|
||||
elif frequencies["number_float"] > tolerated_error_threshold:
|
||||
field_with_type.append(
|
||||
(
|
||||
field_name,
|
||||
"number",
|
||||
{"number_negative": True, "number_decimal_places": 10},
|
||||
)
|
||||
)
|
||||
elif frequencies["number_int"] > tolerated_error_threshold:
|
||||
field_with_type.append(
|
||||
(
|
||||
field_name,
|
||||
"number",
|
||||
{"number_negative": True, "field_options": {"width": 150}},
|
||||
)
|
||||
)
|
||||
else:
|
||||
field_with_type.append((field_name, "text", {}))
|
||||
field_with_type.append((field_name, "text", {}))
|
||||
|
||||
return field_with_type, normalized_data
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import pytest
|
||||
from django.conf import settings
|
||||
from pytest_unordered import unordered
|
||||
|
||||
from baserow.contrib.database.fields.dependencies.circular_reference_checker import (
|
||||
will_cause_circular_dep,
|
||||
|
@ -294,7 +295,7 @@ def test_get_dependant_fields_with_type(data_fixture):
|
|||
(text_field_1_dependency_2, text_field_type, None),
|
||||
(text_field_1_and_2_dependency_1, text_field_type, None),
|
||||
]
|
||||
assert results == expected_text_field_1_dependants
|
||||
assert results == unordered(expected_text_field_1_dependants)
|
||||
|
||||
results = FieldDependencyHandler.get_dependant_fields_with_type(
|
||||
table.id,
|
||||
|
@ -312,7 +313,7 @@ def test_get_dependant_fields_with_type(data_fixture):
|
|||
),
|
||||
(text_field_1_and_2_dependency_1, text_field_type, None),
|
||||
]
|
||||
assert results == expected_text_field_2_dependants
|
||||
assert results == unordered(expected_text_field_2_dependants)
|
||||
|
||||
results = FieldDependencyHandler.get_dependant_fields_with_type(
|
||||
table.id,
|
||||
|
@ -328,6 +329,6 @@ def test_get_dependant_fields_with_type(data_fixture):
|
|||
associated_relations_changed=True,
|
||||
field_cache=field_cache,
|
||||
)
|
||||
assert (
|
||||
results == expected_text_field_1_dependants + expected_text_field_2_dependants
|
||||
assert results == unordered(
|
||||
expected_text_field_1_dependants + expected_text_field_2_dependants
|
||||
)
|
||||
|
|
|
@ -1,39 +1,37 @@
|
|||
import pytest
|
||||
from pyinstrument import Profiler
|
||||
from baserow.core.exceptions import UserNotInGroup
|
||||
from freezegun import freeze_time
|
||||
from decimal import Decimal
|
||||
|
||||
from django.utils import timezone
|
||||
from django.conf import settings
|
||||
from django.test.utils import override_settings
|
||||
from django.utils import timezone
|
||||
from freezegun import freeze_time
|
||||
from pyinstrument import Profiler
|
||||
|
||||
from baserow.contrib.database.fields.models import SelectOption
|
||||
from baserow.contrib.database.fields.field_cache import FieldCache
|
||||
from baserow.contrib.database.fields.dependencies.handler import FieldDependencyHandler
|
||||
from baserow.contrib.database.fields.models import TextField, LongTextField, NumberField
|
||||
from baserow.core.jobs.tasks import run_async_job, clean_up_jobs
|
||||
from baserow.core.jobs.models import Job
|
||||
from baserow.core.jobs.constants import (
|
||||
JOB_FAILED,
|
||||
JOB_FINISHED,
|
||||
JOB_STARTED,
|
||||
JOB_PENDING,
|
||||
)
|
||||
from baserow.contrib.database.file_import.exceptions import (
|
||||
FileImportMaxErrorCountExceeded,
|
||||
)
|
||||
from baserow.contrib.database.fields.exceptions import (
|
||||
MaxFieldLimitExceeded,
|
||||
MaxFieldNameLengthExceeded,
|
||||
ReservedBaserowFieldNameException,
|
||||
InvalidBaserowFieldName,
|
||||
)
|
||||
from baserow.contrib.database.fields.field_cache import FieldCache
|
||||
from baserow.contrib.database.fields.models import SelectOption
|
||||
from baserow.contrib.database.fields.models import TextField
|
||||
from baserow.contrib.database.file_import.exceptions import (
|
||||
FileImportMaxErrorCountExceeded,
|
||||
)
|
||||
from baserow.contrib.database.table.exceptions import (
|
||||
InvalidInitialTableData,
|
||||
InitialTableDataLimitExceeded,
|
||||
InitialTableDataDuplicateName,
|
||||
)
|
||||
from baserow.core.exceptions import UserNotInGroup
|
||||
from baserow.core.jobs.constants import (
|
||||
JOB_FAILED,
|
||||
JOB_FINISHED,
|
||||
JOB_STARTED,
|
||||
JOB_PENDING,
|
||||
)
|
||||
from baserow.core.jobs.models import Job
|
||||
from baserow.core.jobs.tasks import run_async_job, clean_up_jobs
|
||||
|
||||
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
|
@ -225,14 +223,12 @@ def test_run_file_import_task(data_fixture, patch_filefield_storage):
|
|||
field1, field2, field3, field4, field5 = fields
|
||||
|
||||
assert isinstance(field1.specific, TextField)
|
||||
assert isinstance(field2.specific, NumberField)
|
||||
assert field2.specific.number_decimal_places == 10
|
||||
assert isinstance(field3.specific, NumberField)
|
||||
assert field3.specific.number_decimal_places == 10
|
||||
assert isinstance(field4.specific, LongTextField)
|
||||
assert isinstance(field5.specific, LongTextField)
|
||||
assert isinstance(field2.specific, TextField)
|
||||
assert isinstance(field3.specific, TextField)
|
||||
assert isinstance(field4.specific, TextField)
|
||||
assert isinstance(field5.specific, TextField)
|
||||
|
||||
assert getattr(rows[0], field3.db_column) == Decimal("1.55")
|
||||
assert getattr(rows[0], field3.db_column) == "1.55"
|
||||
|
||||
# Test type guessing with error threshold
|
||||
with override_settings(
|
||||
|
@ -246,7 +242,7 @@ def test_run_file_import_task(data_fixture, patch_filefield_storage):
|
|||
table = job.table
|
||||
model = table.get_model()
|
||||
rows = model.objects.all()
|
||||
assert len(rows) == 19
|
||||
assert len(rows) == 20
|
||||
|
||||
fields = table.field_set.all()
|
||||
|
||||
|
@ -255,11 +251,9 @@ def test_run_file_import_task(data_fixture, patch_filefield_storage):
|
|||
field1, field2, field3, field4, field5 = fields
|
||||
|
||||
assert isinstance(field1.specific, TextField)
|
||||
assert isinstance(field2.specific, NumberField)
|
||||
assert field2.specific.number_decimal_places == 0
|
||||
assert isinstance(field3.specific, NumberField)
|
||||
assert field3.specific.number_decimal_places == 10
|
||||
assert isinstance(field4.specific, LongTextField)
|
||||
assert isinstance(field2.specific, TextField)
|
||||
assert isinstance(field3.specific, TextField)
|
||||
assert isinstance(field4.specific, TextField)
|
||||
assert isinstance(field5.specific, TextField)
|
||||
|
||||
# Import data to an existing table
|
||||
|
@ -272,7 +266,7 @@ def test_run_file_import_task(data_fixture, patch_filefield_storage):
|
|||
run_async_job(job.id)
|
||||
|
||||
rows = model.objects.all()
|
||||
assert len(rows) == 21
|
||||
assert len(rows) == 22
|
||||
|
||||
# Import data with error
|
||||
data = [
|
||||
|
@ -291,27 +285,9 @@ def test_run_file_import_task(data_fixture, patch_filefield_storage):
|
|||
job.refresh_from_db()
|
||||
|
||||
rows = model.objects.all()
|
||||
assert len(rows) == 23
|
||||
assert len(rows) == 26
|
||||
|
||||
assert sorted(job.report["failing_rows"].keys()) == ["0", "3"]
|
||||
|
||||
assert job.report["failing_rows"]["0"] == {
|
||||
f"field_{field2.id}": [
|
||||
{"code": "invalid", "error": "A valid number is required."}
|
||||
],
|
||||
f"field_{field3.id}": [
|
||||
{"code": "invalid", "error": "A valid number is required."}
|
||||
],
|
||||
}
|
||||
|
||||
assert job.report["failing_rows"]["3"] == {
|
||||
f"field_{field2.id}": [
|
||||
{
|
||||
"code": "max_decimal_places",
|
||||
"error": "Ensure that there are no more than 0 decimal places.",
|
||||
}
|
||||
]
|
||||
}
|
||||
assert sorted(job.report["failing_rows"].keys()) == []
|
||||
|
||||
# Change user language to test message i18n
|
||||
job.user.profile.language = "fr"
|
||||
|
@ -327,12 +303,7 @@ def test_run_file_import_task(data_fixture, patch_filefield_storage):
|
|||
|
||||
job.refresh_from_db()
|
||||
|
||||
assert job.report["failing_rows"]["0"][f"field_{field2.id}"] == [
|
||||
{
|
||||
"code": "invalid",
|
||||
"error": "Un nombre valide est requis.",
|
||||
}
|
||||
]
|
||||
assert job.report["failing_rows"] == {}
|
||||
|
||||
data = [
|
||||
[1.3],
|
||||
|
|
|
@ -13,6 +13,8 @@ For example:
|
|||
|
||||
### Bug Fixes
|
||||
|
||||
* Disable table import field type guessing and instead always import as text fields. [#1050](https://gitlab.com/bramw/baserow/-/issues/1050)
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
|
||||
|
|
|
@ -224,7 +224,7 @@ export default {
|
|||
|
||||
header.forEach((key) => {
|
||||
const exists = Object.prototype.hasOwnProperty.call(entry, key)
|
||||
const value = exists ? entry[key].toString() : ''
|
||||
const value = exists ? JSON.stringify(entry[key]) : ''
|
||||
row.push(value)
|
||||
})
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue