1
0
Fork 0
mirror of https://gitlab.com/bramw/baserow.git synced 2025-04-08 06:40:07 +00:00

Merge branch '413-allow-searching-in-link-to-table-field-cells' into 'develop'

Resolve "Allow searching in Link to table field cells"

Closes 

See merge request 
This commit is contained in:
Nigel Gott 2023-07-10 09:13:30 +00:00
commit 9f6759414c
29 changed files with 683 additions and 405 deletions

View file

@ -1095,23 +1095,18 @@ MIGRATION_LOCK_ID = os.getenv("BASEROW_MIGRATION_LOCK_ID", 123456)
# Search specific configuration settings.
#
# How long the Postgres full-text search Celery tasks
# can run for being killed. By default, 15 minutes.
BASEROW_CELERY_TSV_MODIFICATION_HARD_LIMIT = 60 * 15
CELERY_SEARCH_UPDATE_HARD_TIME_LIMIT = int(
os.getenv("BASEROW_CELERY_SEARCH_UPDATE_HARD_TIME_LIMIT", 60 * 30)
)
# By default, Baserow will use Postgres full-text as its
# search backend. If the product is installed on a system
# with limited disk space, and less accurate results / degraded
# search performance is acceptable, then switch this setting off.
BASEROW_USE_PG_FULLTEXT_SEARCH = (
os.getenv("BASEROW_USE_PG_FULLTEXT_SEARCH", "true") == "true"
)
BASEROW_USE_PG_FULLTEXT_SEARCH_CONFIG = os.getenv(
"BASEROW_USE_PG_FULLTEXT_SEARCH_CONFIG", "simple"
)
BASEROW_AUTO_VACUUM_AFTER_SEARCH_UPDATE = str_to_bool(
os.getenv("BASEROW_AUTO_VACUUM_AFTER_SEARCH_UPDATE", "true")
USE_PG_FULLTEXT_SEARCH = str_to_bool(
(os.getenv("BASEROW_USE_PG_FULLTEXT_SEARCH", "true"))
)
PG_SEARCH_CONFIG = os.getenv("BASEROW_PG_SEARCH_CONFIG", "simple")
AUTO_VACUUM_AFTER_SEARCH_UPDATE = str_to_bool(os.getenv("BASEROW_AUTO_VACUUM", "true"))
# Indicates whether we are running the tests or not. Set to True in the test.py settings
# file used by pytest.ini

View file

@ -1,9 +1,8 @@
from collections import defaultdict
from typing import Dict, List, Optional, Set, Tuple, cast
from typing import Dict, List, NamedTuple, Optional, Set, Tuple, cast
from django.db.models import Expression, Q, Value
from baserow.contrib.database.fields.dependencies.exceptions import InvalidViaPath
from baserow.contrib.database.fields.field_cache import FieldCache
from baserow.contrib.database.fields.models import Field, LinkRowField
from baserow.contrib.database.fields.signals import field_updated
@ -45,28 +44,35 @@ class PathBasedUpdateStatementCollector:
field: Field,
update_statement: Expression,
path_from_starting_table: Optional[List[LinkRowField]] = None,
):
self._add_update_statement_or_mark_as_changed_for_field(
field, update_statement, path_from_starting_table
)
def mark_field_as_changed(
self,
field: Field,
path_from_starting_table: Optional[List[LinkRowField]] = None,
):
self._add_update_statement_or_mark_as_changed_for_field(
field, None, path_from_starting_table
)
def _add_update_statement_or_mark_as_changed_for_field(
self,
field: Field,
update_statement: Optional[Expression],
path_from_starting_table: Optional[List[LinkRowField]] = None,
):
if not path_from_starting_table:
if self.table != field.table:
# We have been given an update statement for a different table, but
# we don't have a path back to the starting table. This only occurs
# when a link row field has been converted to another type, which will
# have deleted the m2m connection entirely. In this situation we just
# want to update all the cells of the dependant fields because they will
# have all been affected by the deleted connection.
broken_name = f"broken_connection_to_table_{field.table_id}"
if broken_name not in self.sub_paths:
collector = PathBasedUpdateStatementCollector(
field.table, None, connection_is_broken=True
)
self.sub_paths[broken_name] = collector
else:
collector = self.sub_paths[broken_name]
collector.add_update_statement(
collector = self._get_collector_for_broken_connection(field)
collector._add_update_statement_or_mark_as_changed_for_field(
field, update_statement, path_from_starting_table
)
else:
self.update_statements[field.db_column] = update_statement
if update_statement is not None:
self.update_statements[field.db_column] = update_statement
if self.table.needs_background_update_column_added:
self.update_statements[
ROW_NEEDS_BACKGROUND_UPDATE_COLUMN_NAME
@ -74,7 +80,10 @@ class PathBasedUpdateStatementCollector:
else:
next_via_field_link = path_from_starting_table[0]
if next_via_field_link.link_row_table != self.table:
raise InvalidViaPath()
# A link row field has been edited and this has been triggered by the
# related link field that is being deleted, nothing to do as a separate
# update will fix this column.
return
next_link_db_column = next_via_field_link.db_column
if next_link_db_column not in self.sub_paths:
self.sub_paths[next_link_db_column] = PathBasedUpdateStatementCollector(
@ -82,10 +91,29 @@ class PathBasedUpdateStatementCollector:
next_via_field_link,
connection_is_broken=self.connection_is_broken,
)
self.sub_paths[next_link_db_column].add_update_statement(
self.sub_paths[
next_link_db_column
]._add_update_statement_or_mark_as_changed_for_field(
field, update_statement, path_from_starting_table[1:]
)
def _get_collector_for_broken_connection(self, field):
# We have been given an update statement for a different table, but
# we don't have a path back to the starting table. This only occurs
# when a link row field has been converted to another type, which will
# have deleted the m2m connection entirely. In this situation we just
# want to update all the cells of the dependant fields because they will
# have all been affected by the deleted connection.
broken_name = f"broken_connection_to_table_{field.table_id}"
if broken_name not in self.sub_paths:
collector = PathBasedUpdateStatementCollector(
field.table, None, connection_is_broken=True
)
self.sub_paths[broken_name] = collector
else:
collector = self.sub_paths[broken_name]
return collector
def execute_all(
self,
field_cache: FieldCache,
@ -185,6 +213,11 @@ class PathBasedUpdateStatementCollector:
return filters
class UpdatedField(NamedTuple):
field: Field
send_field_update_signal: bool = True
class FieldUpdateCollector:
"""
From a starting table this class collects updated fields and an update
@ -207,7 +240,9 @@ class FieldUpdateCollector:
will only update rows which join back to these starting rows.
"""
self._updated_fields_per_table: Dict[int, Dict[int, Field]] = defaultdict(dict)
self._updated_fields_per_table: Dict[
int, Dict[int, UpdatedField]
] = defaultdict(dict)
self._updated_tables = {}
self._starting_row_ids = starting_row_ids
self._starting_table = starting_table
@ -239,13 +274,45 @@ class FieldUpdateCollector:
"""
# noinspection PyTypeChecker
self._updated_fields_per_table[field.table_id][field.id] = field
self._updated_fields_per_table[field.table_id][field.id] = UpdatedField(field)
if field.table_id not in self._updated_tables:
self._updated_tables[field.table_id] = field.table
self._update_statement_collector.add_update_statement(
field, update_statement, via_path_to_starting_table
)
def add_field_which_has_changed(
self,
field: Field,
via_path_to_starting_table: Optional[List[LinkRowField]] = None,
send_field_updated_signal: bool = True,
):
"""
Stores the provided field as an updated one to send in field updated signals
when triggered to do so. Call this when you have no update statement to run
for the field's cells, but they have still changed and so other cascading
updates or background row tasks still need to be run for them
:param field: The field which has had cell values changed.
:param via_path_to_starting_table: A list of link row fields which lead from
the self.starting_table to the table containing field. Used to properly
order the update statements so the graph is updated in sequence and also
used if self.starting_row_ids is set so only rows which join back to the
starting rows via this path are updated.
:param send_field_updated_signal: Whether to send a field_updated signal
for this field at the end.
"""
# noinspection PyTypeChecker
self._updated_fields_per_table[field.table_id][field.id] = UpdatedField(
field, send_field_updated_signal
)
if field.table_id not in self._updated_tables:
self._updated_tables[field.table_id] = field.table
self._update_statement_collector.mark_field_as_changed(
field, via_path_to_starting_table
)
def apply_updates_and_get_updated_fields(
self, field_cache: FieldCache, skip_search_updates=False
) -> List[Field]:
@ -279,7 +346,10 @@ class FieldUpdateCollector:
will be all the other updated fields in that table.
"""
for field, related_fields in self._get_updated_fields_per_table():
for (
field,
related_fields,
) in self._get_updated_fields_to_send_signals_for_per_table():
if field.table != self._starting_table:
field_updated.send(
self,
@ -292,12 +362,19 @@ class FieldUpdateCollector:
for table in self._updated_tables.values():
table_updated.send(self, table=table, user=None, force_table_refresh=True)
def _get_updated_fields_per_table(self) -> List[Tuple[Field, List[Field]]]:
def _get_updated_fields_to_send_signals_for_per_table(
self,
) -> List[Tuple[Field, List[Field]]]:
result = []
for fields_dict in self._updated_fields_per_table.values():
fields = list(fields_dict.values())
result.append((fields[0], fields[1:]))
fields = [
f.field for f in fields_dict.values() if f.send_field_update_signal
]
if fields:
result.append((fields[0], fields[1:]))
return result
def _for_table(self, table) -> List[Field]:
return list(self._updated_fields_per_table.get(table.id, {}).values())
return [
f.field for f in self._updated_fields_per_table.get(table.id, {}).values()
]

View file

@ -108,7 +108,7 @@ def extract_jsonb_array_values_to_single_string(
path_to_value_in_jsonb_list: Optional[List[Expression]] = None,
transform_value_to_text_func: Optional[Callable[[Expression], Expression]] = None,
extract_as_text: bool = True,
delimiter=" ",
delimiter: str = " ",
):
"""
For a field whose cells contain a JSONB list of objects (File and Lookup fields)

View file

@ -19,6 +19,7 @@ from django.db import OperationalError, models
from django.db.models import (
CharField,
DateTimeField,
Expression,
F,
Func,
OuterRef,
@ -88,7 +89,6 @@ from baserow.core.utils import list_to_comma_separated_string
from baserow.formula import BaserowFormulaException
from baserow.formula.exceptions import FormulaFunctionTypeDoesNotExist
from ..search.expressions import LocalisedSearchVector
from .constants import UPSERT_OPTION_DICT_KEY
from .deferred_field_fk_updater import DeferredFieldFkUpdater
from .dependencies.exceptions import (
@ -786,11 +786,11 @@ class DateFieldType(FieldType):
}
)
def prepare_value_for_search(
def get_search_expression(
self,
field: Union[DateField, LastModifiedField, CreatedOnField],
queryset: QuerySet,
) -> Optional[LocalisedSearchVector]:
) -> Expression:
"""
Prepares a `DateField`, `LastModifiedField` or `CreatedOnField`
for search, by converting the value to its timezone (if the field's
@ -798,19 +798,17 @@ class DateFieldType(FieldType):
`to_char` so that it's formatted properly.
"""
return LocalisedSearchVector(
return Func(
Func(
Func(
# FIXME: what if date_force_timezone is None(user timezone)?
Value(field.date_force_timezone or "UTC", output_field=CharField()),
F(field.db_column),
function="timezone",
output_field=DateTimeField(),
),
Value(field.get_psql_format()),
function="to_char",
output_field=CharField(),
)
# FIXME: what if date_force_timezone is None(user timezone)?
Value(field.date_force_timezone or "UTC", output_field=CharField()),
F(field.db_column),
function="timezone",
output_field=DateTimeField(),
),
Value(field.get_psql_format()),
function="to_char",
output_field=CharField(),
)
def prepare_value_for_db(self, instance, value):
@ -1222,13 +1220,35 @@ class LinkRowFieldType(FieldType):
can_be_primary_field = False
can_get_unique_values = False
def prepare_value_for_search(
self, field: Field, queryset: QuerySet
) -> Optional[LocalisedSearchVector]:
return None
def get_search_expression(self, field: Field, queryset: QuerySet) -> Expression:
remote_field = queryset.model._meta.get_field(field.db_column).remote_field
remote_model = remote_field.model
def is_searchable(self, field: Field) -> bool:
return False
primary_field_object = next(
object
for object in remote_model._field_objects.values()
if object["field"].primary
)
primary_field = primary_field_object["field"]
primary_field_type = primary_field_object["type"]
qs = remote_model.objects.filter(
**{f"{remote_field.related_name}__id": OuterRef("pk")}
).order_by()
# noinspection PyTypeChecker
return Subquery(
# This first values call forces django to group by the ID of the outer
# table we are updating rows in.
qs.values(f"{remote_field.related_name}__id")
.annotate(
value=StringAgg(
primary_field_type.get_search_expression(
primary_field, remote_model.objects
),
" ",
)
)
.values("value")[:1]
)
def enhance_queryset(self, queryset, field, name):
"""
@ -2127,6 +2147,46 @@ class LinkRowFieldType(FieldType):
# ourself.
return FieldDependencyHandler.get_via_dependants_of_link_field(field)
def row_of_dependency_updated(
self,
field: Field,
starting_row: "StartingRowType",
update_collector: "FieldUpdateCollector",
field_cache: "FieldCache",
via_path_to_starting_table: List["LinkRowField"],
):
update_collector.add_field_which_has_changed(
field, via_path_to_starting_table, send_field_updated_signal=False
)
super().row_of_dependency_updated(
field,
starting_row,
update_collector,
field_cache,
via_path_to_starting_table,
)
def field_dependency_updated(
self,
field: Field,
updated_field: Field,
updated_old_field: Field,
update_collector: "FieldUpdateCollector",
field_cache: "FieldCache",
via_path_to_starting_table: Optional[List[LinkRowField]],
):
update_collector.add_field_which_has_changed(
field, via_path_to_starting_table, send_field_updated_signal=False
)
super().field_dependency_updated(
field,
updated_field,
updated_old_field,
update_collector,
field_cache,
via_path_to_starting_table,
)
class EmailFieldType(CharFieldMatchingRegexFieldType):
type = "email"
@ -2163,21 +2223,17 @@ class FileFieldType(FieldType):
can_be_in_form_view = True
can_get_unique_values = False
def prepare_value_for_search(
self, field: FileField, queryset: QuerySet
) -> Optional[LocalisedSearchVector]:
def get_search_expression(self, field: FileField, queryset: QuerySet) -> Expression:
"""
Prepares a `FileField`.
"""
return LocalisedSearchVector(
extract_jsonb_array_values_to_single_string(
field,
queryset,
path_to_value_in_jsonb_list=[
Value("visible_name", output_field=CharField())
],
)
return extract_jsonb_array_values_to_single_string(
field,
queryset,
path_to_value_in_jsonb_list=[
Value("visible_name", output_field=CharField())
],
)
def _extract_file_names(self, value):
@ -2532,15 +2588,11 @@ class SingleSelectFieldType(SelectOptionBaseFieldType):
) -> int:
return getattr(row, f"{field_name}_id")
def prepare_value_for_search(
def get_search_expression(
self, field: SingleSelectField, queryset: QuerySet
) -> Optional[LocalisedSearchVector]:
return LocalisedSearchVector(
Subquery(
queryset.filter(pk=OuterRef("pk")).values(f"{field.db_column}__value")[
:1
]
)
) -> Expression:
return Subquery(
queryset.filter(pk=OuterRef("pk")).values(f"{field.db_column}__value")[:1]
)
def prepare_value_for_db(self, instance, value):
@ -2827,15 +2879,11 @@ class MultipleSelectFieldType(SelectOptionBaseFieldType):
def enhance_queryset(self, queryset, field, name):
return queryset.prefetch_related(name)
def prepare_value_for_search(
self, field: MultipleSelectField, queryset
) -> Optional[LocalisedSearchVector]:
return LocalisedSearchVector(
Subquery(
queryset.filter(pk=OuterRef("pk")).values(
aggregated=StringAgg(f"{field.db_column}__value", " ")
)[:1]
)
def get_search_expression(self, field: MultipleSelectField, queryset) -> Expression:
return Subquery(
queryset.filter(pk=OuterRef("pk")).values(
aggregated=StringAgg(f"{field.db_column}__value", " ")
)[:1]
)
def prepare_value_for_db(self, instance, value):
@ -3213,10 +3261,10 @@ class FormulaFieldType(ReadOnlyFieldType):
OperationalError: _stack_error_mapper,
}
def prepare_value_for_search(
def get_search_expression(
self, field: FormulaField, queryset: QuerySet
) -> Optional[LocalisedSearchVector]:
return self.to_baserow_formula_type(field.specific).prepare_value_for_search(
) -> Expression:
return self.to_baserow_formula_type(field.specific).get_search_expression(
field, queryset
)
@ -4184,15 +4232,13 @@ class MultipleCollaboratorsFieldType(FieldType):
child=field_serializer, required=required, **kwargs
)
def prepare_value_for_search(
def get_search_expression(
self, field: MultipleCollaboratorsField, queryset: QuerySet
) -> Optional[LocalisedSearchVector]:
return LocalisedSearchVector(
Subquery(
queryset.filter(pk=OuterRef("pk")).values(
aggregated=StringAgg(f"{field.db_column}__first_name", " ")
)[:1]
)
) -> Expression:
return Subquery(
queryset.filter(pk=OuterRef("pk")).values(
aggregated=StringAgg(f"{field.db_column}__first_name", " ")
)[:1]
)
def get_internal_value_from_db(

View file

@ -5,13 +5,19 @@ from django.contrib.postgres.fields import ArrayField, JSONField
from django.core.exceptions import ValidationError
from django.core.files.storage import Storage
from django.db import models as django_models
from django.db.models import BooleanField, CharField, DurationField, Q, QuerySet
from django.db.models import (
BooleanField,
CharField,
DurationField,
Expression,
Q,
QuerySet,
)
from django.db.models.fields.related import ForeignKey, ManyToManyField
from django.db.models.functions import Cast
from baserow.contrib.database.fields.constants import UPSERT_OPTION_DICT_KEY
from baserow.contrib.database.fields.field_sortings import OptionallyAnnotatedOrderBy
from baserow.contrib.database.search.expressions import LocalisedSearchVector
from baserow.core.registry import (
APIUrlsInstanceMixin,
APIUrlsRegistryMixin,
@ -129,22 +135,19 @@ class FieldType(
return value
def prepare_value_for_search(
self, field: Field, queryset: QuerySet
) -> Optional[LocalisedSearchVector]:
def get_search_expression(self, field: Field, queryset: QuerySet) -> Expression:
"""
When a row is created, updated or deleted, this `FieldType` method
must return a `SearchVector` that informs Postgres full-text search
how the column should be prepared so that the table's `tsvector`
column can be UPDATEd with it.
When a field/row is created, updated or restored, this `FieldType` method
must return a django expression that can be cast to string that will be used
to create this fields search index column.
"""
return LocalisedSearchVector(Cast(field.db_column, output_field=CharField()))
return Cast(field.db_column, output_field=CharField())
def is_searchable(self, field: Field) -> bool:
"""
If this field needs a tsv search index column made for it then this should
return True. If True is returned then prepare_value_for_search should also
return True. If True is returned then get_search_expression should also
be implemented.
"""

View file

@ -11,7 +11,6 @@ from baserow.contrib.database.fields.field_sortings import OptionallyAnnotatedOr
from baserow.contrib.database.formula.ast import tree
from baserow.contrib.database.formula.registries import formula_function_registry
from baserow.contrib.database.formula.types.exceptions import InvalidFormulaType
from baserow.contrib.database.search.expressions import LocalisedSearchVector
T = TypeVar("T", bound="BaserowFormulaType")
@ -406,19 +405,17 @@ class BaserowFormulaType(abc.ABC):
def __init__(self, nullable=False):
self.nullable = nullable
def prepare_value_for_search(self, field, queryset):
def get_search_expression(self, field, queryset):
(
field_instance,
field_type,
) = self.get_baserow_field_instance_and_type()
# Ensure the fake field_instance can have db_column called on it
field_instance.id = field.id
return field_type.prepare_value_for_search(field_instance, queryset)
return field_type.get_search_expression(field_instance, queryset)
def prepare_value_for_search_in_array(self, field, queryset):
return LocalisedSearchVector(
extract_jsonb_array_values_to_single_string(field, queryset)
)
def get_search_expression_in_array(self, field, queryset) -> Expression:
return extract_jsonb_array_values_to_single_string(field, queryset)
def is_searchable(self, field):
(
@ -447,11 +444,11 @@ class BaserowFormulaInvalidType(BaserowFormulaType):
def should_recreate_when_old_type_was(self, old_type: "BaserowFormulaType") -> bool:
return False
def prepare_value_for_search(self, field, queryset):
return None
def get_search_expression(self, field, queryset) -> Expression:
return Value(None)
def prepare_value_for_search_in_array(self, field, queryset):
return None
def get_search_expression_in_array(self, field, queryset) -> Expression:
return Value(None)
def is_searchable(self, field) -> bool:
return False

View file

@ -36,7 +36,6 @@ from baserow.contrib.database.formula.types.formula_type import (
UnTyped,
)
from baserow.contrib.database.formula.types.serializers import LinkSerializer
from baserow.contrib.database.search.expressions import LocalisedSearchVector
from baserow.core.utils import list_to_comma_separated_string
@ -210,18 +209,16 @@ class BaserowFormulaLinkType(BaserowFormulaTextType):
) -> "BaserowExpression[BaserowFormulaValidType]":
return formula_function_registry.get("link")(literal(""))
def prepare_value_for_search(self, field, queryset):
return LocalisedSearchVector(
Concat(
json_extract_path(F(field.db_column), [Value("label")]),
Value(" ("),
json_extract_path(F(field.db_column), [Value("url")]),
Value(")"),
output_field=models.TextField(),
)
def get_search_expression(self, field, queryset):
return Concat(
json_extract_path(F(field.db_column), [Value("label")]),
Value(" ("),
json_extract_path(F(field.db_column), [Value("url")]),
Value(")"),
output_field=models.TextField(),
)
def prepare_value_for_search_in_array(self, field, queryset):
def get_search_expression_in_array(self, field, queryset):
def transform_value_to_text_func(x):
# Make sure we don't send the keys of the jsonb to ts_vector by extracting
# and re-ordering the label/url parameters to match the correct format
@ -234,12 +231,10 @@ class BaserowFormulaLinkType(BaserowFormulaTextType):
output_field=models.TextField(),
)
return LocalisedSearchVector(
extract_jsonb_array_values_to_single_string(
field,
queryset,
transform_value_to_text_func=transform_value_to_text_func,
)
return extract_jsonb_array_values_to_single_string(
field,
queryset,
transform_value_to_text_func=transform_value_to_text_func,
)
def is_searchable(self, field):
@ -499,12 +494,8 @@ class BaserowFormulaDateIntervalType(
def is_searchable(self, field):
return True
def prepare_value_for_search(
self, field: Field, queryset: QuerySet
) -> Optional[LocalisedSearchVector]:
return LocalisedSearchVector(
Cast(field.db_column, output_field=models.CharField())
)
def get_search_expression(self, field: Field, queryset: QuerySet) -> Expression:
return Cast(field.db_column, output_field=models.CharField())
class BaserowFormulaDateType(BaserowFormulaValidType):
@ -621,7 +612,7 @@ class BaserowFormulaDateType(BaserowFormulaValidType):
return Value(timezone.now(), output_field=field)
def prepare_value_for_search_in_array(self, field, queryset):
def get_search_expression_in_array(self, field, queryset):
def transform_value_to_text_func(x):
return Func(
Func(
@ -636,12 +627,10 @@ class BaserowFormulaDateType(BaserowFormulaValidType):
output_field=models.CharField(),
)
return LocalisedSearchVector(
extract_jsonb_array_values_to_single_string(
field,
queryset,
transform_value_to_text_func=transform_value_to_text_func,
)
return extract_jsonb_array_values_to_single_string(
field,
queryset,
transform_value_to_text_func=transform_value_to_text_func,
)
def get_order_by_in_array_expr(self, field, field_name, order_direction):
@ -668,8 +657,8 @@ class BaserowFormulaArrayType(BaserowFormulaValidType):
self.array_formula_type = sub_type.type
self.sub_type = sub_type
def prepare_value_for_search(self, field, queryset):
return self.sub_type.prepare_value_for_search_in_array(field, queryset)
def get_search_expression(self, field, queryset):
return self.sub_type.get_search_expression_in_array(field, queryset)
def is_searchable(self, field):
return True
@ -938,21 +927,17 @@ class BaserowFormulaSingleSelectType(BaserowFormulaValidType):
single_select_value, literal("")
)
def prepare_value_for_search(self, field, queryset):
return LocalisedSearchVector(
Cast(F(field.db_column + "__value"), output_field=models.CharField())
)
def get_search_expression(self, field, queryset):
return Cast(F(field.db_column + "__value"), output_field=models.CharField())
def prepare_value_for_search_in_array(self, field, queryset):
return LocalisedSearchVector(
extract_jsonb_array_values_to_single_string(
field,
queryset,
path_to_value_in_jsonb_list=[
Value("value", output_field=models.CharField()),
Value("value", output_field=models.CharField()),
],
)
def get_search_expression_in_array(self, field, queryset):
return extract_jsonb_array_values_to_single_string(
field,
queryset,
path_to_value_in_jsonb_list=[
Value("value", output_field=models.CharField()),
Value("value", output_field=models.CharField()),
],
)
def is_searchable(self, field):

View file

@ -1,5 +1,5 @@
class PostgresFullTextSearchDisabledException(Exception):
"""
Raised when the Postgres full-text specific search handler methods
are called, and `BASEROW_USE_PG_FULLTEXT_SEARCH` is disabled.
are called, and `USE_PG_FULLTEXT_SEARCH` is disabled.
"""

View file

@ -7,13 +7,13 @@ class LocalisedSearchVector(SearchVector):
A `SearchVector` which is responsible for two additional requirements:
1. The `SearchVector.config` is always set to what the value of
`BASEROW_USE_PG_FULLTEXT_SEARCH_CONFIG` is set to.
`PG_SEARCH_CONFIG` is set to.
2. The `Expression` given to it is always wrapped in `special_char_tokenizer`,
a Django `Func` which converts specific characters in the text into spaces.
See `special_char_tokenizer`'s docstring for more detailed information.
Any new `FieldType` which should be searchable should have its
`prepare_value_for_search` return a `LocalisedSearchVector`, in only very
`get_search_expression` return a `LocalisedSearchVector`, in only very
specific cases would a `SearchVector` be used outside `FieldType`.
"""

View file

@ -17,6 +17,7 @@ from baserow.contrib.database.db.schema import safe_django_schema_editor
from baserow.contrib.database.search.exceptions import (
PostgresFullTextSearchDisabledException,
)
from baserow.contrib.database.search.expressions import LocalisedSearchVector
from baserow.contrib.database.search.regexes import (
RE_ONE_OR_MORE_WHITESPACE,
RE_REMOVE_ALL_PUNCTUATION_ALREADY_REMOVED_FROM_TSVS_FOR_QUERY,
@ -68,11 +69,11 @@ class SearchHandler(
):
@classmethod
def full_text_enabled(cls):
return settings.BASEROW_USE_PG_FULLTEXT_SEARCH
return settings.USE_PG_FULLTEXT_SEARCH
@classmethod
def search_config(cls):
return settings.BASEROW_USE_PG_FULLTEXT_SEARCH_CONFIG
return settings.PG_SEARCH_CONFIG
@classmethod
def special_char_tokenizer(cls, expression: Expression) -> Func:
@ -100,9 +101,9 @@ class SearchHandler(
in the beginning, middle or end of the string. This is to match
Postgres' removal of hyphens in the simple dictionary.
:param expression: The Expression which a `FieldType.prepare_value_for_search`
has returned to `LocalisedSearchVector`, which in turn has called this
classmethod so that we convert the Expression's text into specific tokens.
:param expression: The Expression which a `FieldType.get_search_expression`
which has called this classmethod so that we convert the Expression's text
into specific tokens.
:return: Func
"""
@ -234,7 +235,7 @@ class SearchHandler(
) -> List[FieldWithSearchVector]:
"""
Responsible for finding all specific fields in a table, then per `FieldType`,
calling `prepare_value_for_search` to get its `SearchVector` object, if
calling `get_search_expression` to get its `SearchVector` object, if
the field type is searchable.
"""
@ -256,7 +257,9 @@ class SearchHandler(
field_type = field_type_registry.get_by_model(field)
if field_type.is_searchable(field):
search_vector = field_type.prepare_value_for_search(field, queryset)
search_vector = LocalisedSearchVector(
field_type.get_search_expression(field, queryset)
)
else:
search_vector = Value(None)
return FieldWithSearchVector(field, search_vector)
@ -366,7 +369,7 @@ class SearchHandler(
was_full_column_update = not update_tsvectors_for_changed_rows_only
if (
was_full_column_update
and settings.BASEROW_AUTO_VACUUM_AFTER_SEARCH_UPDATE
and settings.AUTO_VACUUM_AFTER_SEARCH_UPDATE
and not settings.TESTS
):
cls.vacuum_table(table)

View file

@ -12,7 +12,7 @@ from baserow.contrib.database.search.exceptions import (
@app.task(
queue="export",
time_limit=settings.BASEROW_CELERY_TSV_MODIFICATION_HARD_LIMIT,
time_limit=settings.CELERY_SEARCH_UPDATE_HARD_TIME_LIMIT,
)
def async_update_tsvector_columns(
table_id: int,
@ -20,15 +20,14 @@ def async_update_tsvector_columns(
field_ids_to_restrict_update_to: Optional[List[int]] = None,
):
"""
Responsible for asynchronously updating a `tsvector` column on a table.
Responsible for asynchronously updating the `tsvector` columns on a table.
:param table_id: The ID of the table we'd like to update a tsvector on.
:param table_id: The ID of the table we'd like to update the tsvectors for.
:param update_tsvs_for_changed_rows_only: By default we will only update rows on
the table which have changed since the last re-index. If set to `False`, we
will fully re-index the table.
the table which have changed since the last search update.
If set to `False`, we will index all cells that match the other parameters.
:param field_ids_to_restrict_update_to: If provided only the fields matching the
provided ids will have their tsv columns updated.
:return: None
"""
from baserow.contrib.database.search.handler import SearchHandler

View file

@ -169,8 +169,8 @@ class TableModelQuerySet(models.QuerySet):
def search_all_fields(
self,
search,
only_search_by_field_ids=None,
search: str,
only_search_by_field_ids: Optional[Iterable[int]] = None,
search_mode: Optional[SearchModes] = None,
):
"""
@ -180,11 +180,9 @@ class TableModelQuerySet(models.QuerySet):
searched.
:param search: The search query.
:type search: str
:param only_search_by_field_ids: Only field ids in this iterable will be
filtered by the search term. Other fields not in the iterable will be
ignored and not be filtered.
:type only_search_by_field_ids: Optional[Iterable[int]]
:param search_mode: In `MODE_COMPAT` we will use the old search method, using
the LIKE operator on each column. In `MODE_FT_WITH_COUNT` we will switch
to using Postgres full-text search.
@ -198,7 +196,7 @@ class TableModelQuerySet(models.QuerySet):
# If we are searching with Postgres full text search (whether with
# or without a COUNT)...
if search_mode == SearchModes.MODE_FT_WITH_COUNT:
# If `BASEROW_USE_PG_FULLTEXT_SEARCH` is enabled, then use
# If `USE_PG_FULLTEXT_SEARCH` is enabled, then use
# the Postgres full-text search functionality instead.
if self.model.baserow_table.tsvectors_are_supported:
return self.pg_search(search, only_search_by_field_ids)
@ -764,7 +762,7 @@ class Table(
fields = []
# By default, we create an index on the `order` and `id`
# columns. If `BASEROW_USE_PG_FULLTEXT_SEARCH` is enabled, which
# columns. If `USE_PG_FULLTEXT_SEARCH` is enabled, which
# it is by default, we'll include a GIN index on the table's
# `tsvector` column.
indexes = [

View file

@ -1966,6 +1966,8 @@ class ViewHandler(metaclass=baserow_trace_methods(tracer)):
fields provide those field ids in this optional iterable. Other fields
not present in the iterable will not be searched and filtered down by the
search term.
:param apply_filters: Whether to apply view filters to the resulting queryset.
:param search_mode: The type of search to perform if a search term is provided.
:return: The appropriate queryset for the provided view.
"""

View file

@ -513,7 +513,7 @@ def migrator(second_separate_database_for_migrations, reset_schema):
@pytest.fixture
def disable_full_text_search(settings):
settings.BASEROW_USE_PG_FULLTEXT_SEARCH = False
settings.USE_PG_FULLTEXT_SEARCH = False
@pytest.fixture

View file

@ -73,9 +73,8 @@ def test_can_add_fields_in_same_starting_table_with_row_filter(
@pytest.mark.django_db
@patch("baserow.contrib.database.fields.signals.field_updated.send")
def test_can_only_trigger_update_for_rows_joined_to_a_starting_row_across_a_m2m(
send_mock, api_client, data_fixture, django_assert_num_queries
api_client, data_fixture, django_assert_num_queries
):
user = data_fixture.create_user()
database = data_fixture.create_database_application(user=user)
@ -110,42 +109,44 @@ def test_can_only_trigger_update_for_rows_joined_to_a_starting_row_across_a_m2m(
first_table_2_row.save()
field_cache = FieldCache()
update_collector = FieldUpdateCollector(
second_table, starting_row_ids=[second_table_a_row.id]
)
update_collector.add_field_with_pending_update_statement(
first_table_primary_field,
Value("other"),
via_path_to_starting_table=[link_row_field],
)
# Cache the models so we are only asserting about the update queries
field_cache.cache_model(first_table.get_model())
field_cache.cache_model(second_table.get_model())
# Only one field was updated so only one update statement is expected
with django_assert_num_queries(1):
updated_fields = update_collector.apply_updates_and_get_updated_fields(
field_cache
with patch(
"baserow.contrib.database.fields.signals.field_updated.send"
) as send_mock:
update_collector = FieldUpdateCollector(
second_table, starting_row_ids=[second_table_a_row.id]
)
update_collector.add_field_with_pending_update_statement(
first_table_primary_field,
Value("other"),
via_path_to_starting_table=[link_row_field],
)
# Cache the models so we are only asserting about the update queries
field_cache.cache_model(first_table.get_model())
field_cache.cache_model(second_table.get_model())
# Only one field was updated so only one update statement is expected
with django_assert_num_queries(1):
updated_fields = update_collector.apply_updates_and_get_updated_fields(
field_cache
)
# No field in the starting table (second_table) was updated
assert updated_fields == []
first_table_1_row.refresh_from_db()
first_table_2_row.refresh_from_db()
assert first_table_1_row.primary == "other"
assert first_table_2_row.primary == "2"
# No field in the starting table (second_table) was updated
assert updated_fields == []
first_table_1_row.refresh_from_db()
first_table_2_row.refresh_from_db()
assert first_table_1_row.primary == "other"
assert first_table_2_row.primary == "2"
send_mock.assert_not_called()
update_collector.send_additional_field_updated_signals()
send_mock.assert_called_once()
assert send_mock.call_args[1]["field"].id == first_table_primary_field.id
assert send_mock.call_args[1]["user"] is None
assert send_mock.call_args[1]["related_fields"] == []
send_mock.assert_not_called()
update_collector.send_additional_field_updated_signals()
send_mock.assert_called_once()
assert send_mock.call_args[1]["field"].id == first_table_primary_field.id
assert send_mock.call_args[1]["user"] is None
assert send_mock.call_args[1]["related_fields"] == []
@pytest.mark.django_db
@patch("baserow.contrib.database.fields.signals.field_updated.send")
def test_can_trigger_update_for_rows_joined_to_a_starting_row_across_a_m2m_and_back(
send_mock, api_client, data_fixture, django_assert_num_queries
api_client, data_fixture, django_assert_num_queries
):
user = data_fixture.create_user()
database = data_fixture.create_database_application(user=user)
@ -182,56 +183,58 @@ def test_can_trigger_update_for_rows_joined_to_a_starting_row_across_a_m2m_and_b
first_table_2_row.link.add(second_table_b_row.id)
first_table_2_row.save()
field_cache = FieldCache()
update_collector = FieldUpdateCollector(
second_table, starting_row_ids=[second_table_a_row.id]
)
update_collector.add_field_with_pending_update_statement(
first_table_primary_field,
Value("other"),
via_path_to_starting_table=[link_row_field],
)
update_collector.add_field_with_pending_update_statement(
second_table_primary_field,
Value("other"),
via_path_to_starting_table=[
link_row_field,
link_row_field.link_row_related_field,
],
)
# Cache the models so we are only asserting about the update queries
field_cache.cache_model(first_table.get_model())
field_cache.cache_model(second_table.get_model())
# Two fields were updated with an update statement for each table
with django_assert_num_queries(2):
updated_fields = update_collector.apply_updates_and_get_updated_fields(
field_cache
with patch(
"baserow.contrib.database.fields.signals.field_updated.send"
) as send_mock:
field_cache = FieldCache()
update_collector = FieldUpdateCollector(
second_table, starting_row_ids=[second_table_a_row.id]
)
update_collector.add_field_with_pending_update_statement(
first_table_primary_field,
Value("other"),
via_path_to_starting_table=[link_row_field],
)
update_collector.add_field_with_pending_update_statement(
second_table_primary_field,
Value("other"),
via_path_to_starting_table=[
link_row_field,
link_row_field.link_row_related_field,
],
)
# Cache the models so we are only asserting about the update queries
field_cache.cache_model(first_table.get_model())
field_cache.cache_model(second_table.get_model())
# Two fields were updated with an update statement for each table
with django_assert_num_queries(2):
updated_fields = update_collector.apply_updates_and_get_updated_fields(
field_cache
)
assert updated_fields == [second_table_primary_field]
first_table_1_row.refresh_from_db()
first_table_2_row.refresh_from_db()
second_table_a_row.refresh_from_db()
second_table_b_row.refresh_from_db()
second_table_unlinked_row.refresh_from_db()
assert first_table_1_row.primary == "other"
assert first_table_2_row.primary == "2"
assert second_table_a_row.primary == "other"
assert second_table_b_row.primary == "other"
assert second_table_unlinked_row.primary == "unlinked"
assert updated_fields == [second_table_primary_field]
first_table_1_row.refresh_from_db()
first_table_2_row.refresh_from_db()
second_table_a_row.refresh_from_db()
second_table_b_row.refresh_from_db()
second_table_unlinked_row.refresh_from_db()
assert first_table_1_row.primary == "other"
assert first_table_2_row.primary == "2"
assert second_table_a_row.primary == "other"
assert second_table_b_row.primary == "other"
assert second_table_unlinked_row.primary == "unlinked"
send_mock.assert_not_called()
update_collector.send_additional_field_updated_signals()
send_mock.assert_called_once()
assert send_mock.call_args[1]["field"].id == first_table_primary_field.id
assert send_mock.call_args[1]["user"] is None
assert send_mock.call_args[1]["related_fields"] == []
send_mock.assert_not_called()
update_collector.send_additional_field_updated_signals()
send_mock.assert_called_once()
assert send_mock.call_args[1]["field"].id == first_table_primary_field.id
assert send_mock.call_args[1]["user"] is None
assert send_mock.call_args[1]["related_fields"] == []
@pytest.mark.django_db
@patch("baserow.contrib.database.fields.signals.field_updated.send")
def test_update_statements_at_the_same_path_node_are_grouped_into_one(
send_mock, api_client, data_fixture, django_assert_num_queries
api_client, data_fixture, django_assert_num_queries
):
user = data_fixture.create_user()
database = data_fixture.create_database_application(user=user)
@ -271,55 +274,58 @@ def test_update_statements_at_the_same_path_node_are_grouped_into_one(
first_table_2_row.link.add(second_table_b_row.id)
first_table_2_row.save()
field_cache = FieldCache()
update_collector = FieldUpdateCollector(
second_table, starting_row_ids=[second_table_a_row.id]
)
update_collector.add_field_with_pending_update_statement(
first_table_primary_field,
Value("other"),
via_path_to_starting_table=[link_row_field],
)
update_collector.add_field_with_pending_update_statement(
first_table_other_field,
Value("updated"),
via_path_to_starting_table=[link_row_field],
)
update_collector.add_field_with_pending_update_statement(
second_table_primary_field,
Value("other"),
via_path_to_starting_table=[
link_row_field,
link_row_field.link_row_related_field,
],
)
# Cache the models so we are only asserting about the update queries
field_cache.cache_model(first_table.get_model())
field_cache.cache_model(second_table.get_model())
# Three fields were updated but two are in the same path node (same table) and so
# only one update per table expected
with django_assert_num_queries(2):
updated_fields = update_collector.apply_updates_and_get_updated_fields(
field_cache
with patch(
"baserow.contrib.database.fields.signals.field_updated.send"
) as send_mock:
field_cache = FieldCache()
update_collector = FieldUpdateCollector(
second_table, starting_row_ids=[second_table_a_row.id]
)
update_collector.add_field_with_pending_update_statement(
first_table_primary_field,
Value("other"),
via_path_to_starting_table=[link_row_field],
)
update_collector.add_field_with_pending_update_statement(
first_table_other_field,
Value("updated"),
via_path_to_starting_table=[link_row_field],
)
update_collector.add_field_with_pending_update_statement(
second_table_primary_field,
Value("other"),
via_path_to_starting_table=[
link_row_field,
link_row_field.link_row_related_field,
],
)
# Cache the models so we are only asserting about the update queries
field_cache.cache_model(first_table.get_model())
field_cache.cache_model(second_table.get_model())
# Three fields were updated but two are in the same path node (same table) and
# so only one update per table expected
with django_assert_num_queries(2):
updated_fields = update_collector.apply_updates_and_get_updated_fields(
field_cache
)
assert updated_fields == [second_table_primary_field]
first_table_1_row.refresh_from_db()
first_table_2_row.refresh_from_db()
second_table_a_row.refresh_from_db()
second_table_b_row.refresh_from_db()
second_table_unlinked_row.refresh_from_db()
assert first_table_1_row.primary == "other"
assert first_table_1_row.other == "updated"
assert first_table_2_row.primary == "2"
assert first_table_2_row.other == "y"
assert second_table_a_row.primary == "other"
assert second_table_b_row.primary == "other"
assert second_table_unlinked_row.primary == "unlinked"
assert updated_fields == [second_table_primary_field]
first_table_1_row.refresh_from_db()
first_table_2_row.refresh_from_db()
second_table_a_row.refresh_from_db()
second_table_b_row.refresh_from_db()
second_table_unlinked_row.refresh_from_db()
assert first_table_1_row.primary == "other"
assert first_table_1_row.other == "updated"
assert first_table_2_row.primary == "2"
assert first_table_2_row.other == "y"
assert second_table_a_row.primary == "other"
assert second_table_b_row.primary == "other"
assert second_table_unlinked_row.primary == "unlinked"
send_mock.assert_not_called()
update_collector.send_additional_field_updated_signals()
send_mock.assert_called_once()
assert send_mock.call_args[1]["field"].id == first_table_primary_field.id
assert send_mock.call_args[1]["user"] is None
assert send_mock.call_args[1]["related_fields"] == [first_table_other_field]
send_mock.assert_not_called()
update_collector.send_additional_field_updated_signals()
send_mock.assert_called_once()
assert send_mock.call_args[1]["field"].id == first_table_primary_field.id
assert send_mock.call_args[1]["user"] is None
assert send_mock.call_args[1]["related_fields"] == [first_table_other_field]

View file

@ -14,7 +14,7 @@ from baserow.core.user_files.handler import UserFileHandler
@pytest.mark.django_db(transaction=True)
def test_textfield_prepare_value_for_search(data_fixture, enable_singleton_testing):
def test_textfield_get_search_expression(data_fixture, enable_singleton_testing):
with transaction.atomic():
user = data_fixture.create_user()
database = data_fixture.create_database_application(user=user)
@ -38,7 +38,7 @@ def test_textfield_prepare_value_for_search(data_fixture, enable_singleton_testi
@pytest.mark.django_db(transaction=True)
def test_longtextfield_prepare_value_for_search(data_fixture, enable_singleton_testing):
def test_longtextfield_get_search_expression(data_fixture, enable_singleton_testing):
with transaction.atomic():
user = data_fixture.create_user()
database = data_fixture.create_database_application(user=user)
@ -62,7 +62,7 @@ def test_longtextfield_prepare_value_for_search(data_fixture, enable_singleton_t
@pytest.mark.django_db(transaction=True)
def test_numberfield_prepare_value_for_search(data_fixture, enable_singleton_testing):
def test_numberfield_get_search_expression(data_fixture, enable_singleton_testing):
with transaction.atomic():
user = data_fixture.create_user()
database = data_fixture.create_database_application(user=user)
@ -86,7 +86,7 @@ def test_numberfield_prepare_value_for_search(data_fixture, enable_singleton_tes
@pytest.mark.django_db(transaction=True)
def test_filefield_prepare_value_for_search(
def test_filefield_get_search_expression(
data_fixture, tmpdir, enable_singleton_testing
):
with transaction.atomic():
@ -136,7 +136,7 @@ def test_filefield_prepare_value_for_search(
@pytest.mark.django_db(transaction=True)
def test_urlfield_prepare_value_for_search(data_fixture, enable_singleton_testing):
def test_urlfield_get_search_expression(data_fixture, enable_singleton_testing):
with transaction.atomic():
user = data_fixture.create_user()
database = data_fixture.create_database_application(user=user)
@ -160,7 +160,7 @@ def test_urlfield_prepare_value_for_search(data_fixture, enable_singleton_testin
@pytest.mark.django_db(transaction=True)
def test_emailfield_prepare_value_for_search(data_fixture, enable_singleton_testing):
def test_emailfield_get_search_expression(data_fixture, enable_singleton_testing):
with transaction.atomic():
user = data_fixture.create_user()
database = data_fixture.create_database_application(user=user)
@ -184,7 +184,7 @@ def test_emailfield_prepare_value_for_search(data_fixture, enable_singleton_test
@pytest.mark.django_db(transaction=True)
def test_datefield_without_time_prepare_value_for_search(
def test_datefield_without_time_get_search_expression(
data_fixture, enable_singleton_testing
):
with transaction.atomic():
@ -210,7 +210,7 @@ def test_datefield_without_time_prepare_value_for_search(
@pytest.mark.django_db(transaction=True)
def test_datefield_with_time_prepare_value_for_search(
def test_datefield_with_time_get_search_expression(
data_fixture, enable_singleton_testing
):
with transaction.atomic():
@ -238,7 +238,7 @@ def test_datefield_with_time_prepare_value_for_search(
@pytest.mark.django_db(transaction=True)
def test_singleselectfield_prepare_value_for_search(
def test_singleselectfield_get_search_expression(
data_fixture, enable_singleton_testing
):
with transaction.atomic():
@ -271,9 +271,7 @@ def test_singleselectfield_prepare_value_for_search(
@pytest.mark.django_db(transaction=True)
def test_multiselectfield_prepare_value_for_search(
data_fixture, enable_singleton_testing
):
def test_multiselectfield_get_search_expression(data_fixture, enable_singleton_testing):
with transaction.atomic():
user = data_fixture.create_user()
@ -326,7 +324,7 @@ def test_multiselectfield_prepare_value_for_search(
@pytest.mark.django_db(transaction=True)
def test_collaboratorfield_prepare_value_for_search(
def test_collaboratorfield_get_search_expression(
data_fixture, enable_singleton_testing
):
with transaction.atomic():
@ -367,7 +365,7 @@ def test_collaboratorfield_prepare_value_for_search(
@pytest.mark.django_db(transaction=True)
def test_lookupfield_prepare_value_for_search(
def test_lookupfield_get_search_expression(
data_fixture,
enable_singleton_testing,
django_assert_num_queries,
@ -435,3 +433,65 @@ def test_lookupfield_prepare_value_for_search(
assert qs.get().id == table_b_looking_up_jeff_and_clive.id
assert not model.objects.all().pg_search("steve").exists()
@pytest.mark.django_db(transaction=True)
def test_linkrowfield_get_search_expression(
data_fixture,
enable_singleton_testing,
django_assert_num_queries,
):
with transaction.atomic():
workspace = data_fixture.create_workspace()
creator = data_fixture.create_user(workspace=workspace)
table_a, table_b, link_field = data_fixture.create_two_linked_tables(
user=creator, table_kwargs={"force_add_tsvectors": True}
)
table_a_primary = table_a.field_set.get(primary=True)
table_a_row_1 = RowHandler().create_row(
user=creator,
table=table_a,
values={
f"{link_field.db_column}": [],
f"{table_a_primary.db_column}": "jeff",
},
)
table_a_row_2 = RowHandler().create_row(
user=creator,
table=table_a,
values={
f"{link_field.db_column}": [],
f"{table_a_primary.db_column}": "clive",
},
)
table_b_row_linking_to_jeff = RowHandler().create_row(
user=creator,
table=table_b,
values={
f"field_{link_field.link_row_related_field_id}": [table_a_row_1.id]
},
)
table_b_linking_to_jeff_and_clive = RowHandler().create_row(
user=creator,
table=table_b,
values={
f"field_{link_field.link_row_related_field_id}": [
table_a_row_1.id,
table_a_row_2.id,
]
},
)
model = table_b.get_model()
qs = list(model.objects.all().pg_search("jeff").values_list("id", flat=True))
assert qs == [
table_b_row_linking_to_jeff.id,
table_b_linking_to_jeff_and_clive.id,
]
qs = model.objects.all().pg_search("clive")
assert qs.get().id == table_b_linking_to_jeff_and_clive.id
assert not model.objects.all().pg_search("steve").exists()

View file

@ -0,0 +1,7 @@
{
"type": "breaking_change",
"message": "Baserows default max per table field limit now defaults to 600 due to full text search and undo/redo needing to use the rest of the postgres 1600 column limit. This can be reverted using the new BASEROW_MAX_FIELD_LIMIT env var. If you want to have more than 600 fields we also recommend you turn off full text search as it needs an extra column per field to work, this can be done by setting BASEROW_USE_PG_FULLTEXT_SEARCH to false.",
"issue_number": 1706,
"bullet_points": [],
"created_at": "2023-07-08"
}

View file

@ -0,0 +1,7 @@
{
"type": "breaking_change",
"message": "Before when searching for a number say 1, it would match the row with id 1, 10, 11, 12 etc. Now it will only match rows with that exact id, so searching for 1 will match the row with id 1 and not the row with id 10 etc.",
"issue_number": 1706,
"bullet_points": [],
"created_at": "2023-07-08"
}

View file

@ -0,0 +1,7 @@
{
"type": "breaking_change",
"message": "By default in the UI search now uses full text mode which ignores punctuation and behaves differently than the previous exact matching. For now the API defaults to search_mode=compat, however in the coming months we will switch the API default to the new mode instead.",
"issue_number": 1706,
"bullet_points": [],
"created_at": "2023-07-08"
}

View file

@ -0,0 +1,7 @@
{
"type": "feature",
"message": "After field updates, deletions and creations Baserow now automatically vacuums the table in a background task to improve performance and reduce table disk size. This can be disabled by setting the new env var BASEROW_AUTO_VACUUM=false.",
"issue_number": 1706,
"bullet_points": [],
"created_at": "2023-07-08"
}

View file

@ -1,7 +1,7 @@
{
"type": "feature",
"message": "Rework Baserow row search to be much faster and instead search for words instead of exact matches including punctuation.",
"message": "Rework Baserow row search to be much faster, work for all field types and instead search for words instead of exact matches including punctuation. Please note this new full text search mode can increase the disk space used by your Baserow tables upto 3x, to prevent this you can disable the new search and stick with the legacy slower but lower disk space usage search by setting the new env var BASEROW_USE_PG_FULLTEXT_SEARCH=false.",
"issue_number": 1706,
"bullet_points": [],
"created_at": "2023-07-03"
}
}

View file

@ -134,6 +134,7 @@ x-backend-variables: &backend-variables
BASEROW_PERSONAL_VIEW_LOWEST_ROLE_ALLOWED:
BASEROW_DISABLE_LOCKED_MIGRATIONS:
BASEROW_USE_PG_FULLTEXT_SEARCH:
BASEROW_AUTO_VACUUM:
services:
# A caddy reverse proxy sitting in-front of all the services. Responsible for routing

View file

@ -153,6 +153,7 @@ x-backend-variables: &backend-variables
BASEROW_PERSONAL_VIEW_LOWEST_ROLE_ALLOWED:
BASEROW_DISABLE_LOCKED_MIGRATIONS:
BASEROW_USE_PG_FULLTEXT_SEARCH:
BASEROW_AUTO_VACUUM:
services:
backend:

View file

@ -152,6 +152,7 @@ x-backend-variables: &backend-variables
BASEROW_PERSONAL_VIEW_LOWEST_ROLE_ALLOWED:
BASEROW_DISABLE_LOCKED_MIGRATIONS:
BASEROW_USE_PG_FULLTEXT_SEARCH:
BASEROW_AUTO_VACUUM:
services:
# A caddy reverse proxy sitting in-front of all the services. Responsible for routing

View file

@ -56,10 +56,9 @@ The installation methods referred to in the variable descriptions are:
| BATCH\_ROWS\_SIZE\_LIMIT | Controls how many rows can be created, deleted or updated at once using the batch endpoints. | 200 |
| BASEROW\_MAX\_SNAPSHOTS\_PER\_GROUP | Controls how many application snapshots can be created per group. | -1 (unlimited) |
| BASEROW\_SNAPSHOT\_EXPIRATION\_TIME\_DAYS | Controls when snapshots expire, set in number of days. Expired snapshots will be automatically deleted. | 360 |
| BASEROW\_CELERY\_TSV\_MODIFICATION\_HARD\_LIMIT | How long the Postgres full-text search Celery tasks can run for being killed. | 900 |
| BASEROW\_USE\_PG\_FULLTEXT\_SEARCH | By default, Baserow will use Postgres full-text as its search backend. If the product is installed on a system with limited disk space, and less accurate results / degraded search performance is acceptable, then switch this setting off. | true |
| BASEROW\_USE\_PG\_FULLTEXT\_SEARCH\_CONFIG | The Postgres search configuration to use in `to_tsvector` and `to_tsquery` calls. | simple |
| BASEROW\_AUTO\_VACUUM\_AFTER\_SEARCH_UPDATE | Whether Baserow should perform a `VACUUM` on a table after one or more fields required a `tsvector` update. | true |
| BASEROW_\CELERY_\SEARCH_\UPDATE_\HARD_\TIME_\LIMIT | How long the Postgres full-text search Celery tasks can run for being killed. | 1800 |
| BASEROW\_USE\_PG\_FULLTEXT\_SEARCH | By default, Baserow will use Postgres full-text as its search backend. If the product is installed on a system with limited disk space, and less accurate results / degraded search performance is acceptable, then switch this setting off by setting it to false. | true |
| BASEROW\_AUTO\_VACUUM | Whether Baserow should perform a `VACUUM` on a table in a background task after one or more fields changed in the table when full text search is enabled. | true |
### Backend Database Configuration
| Name | Description | Defaults |
@ -240,7 +239,7 @@ domain than your Baserow, you need to make sure CORS is configured correctly.
| ADDITIONAL\_MODULES | **Internal** A list of file paths to Nuxt module.js files to load as additional Nuxt modules into Baserow on startup. | |
| BASEROW\_DISABLE\_GOOGLE\_DOCS\_FILE\_PREVIEW | Set to \`true\` or \`1\` to disable Google docs file preview. | |
| BASEROW_MAX_SNAPSHOTS_PER_GROUP | Controls how many application snapshots can be created per workspace. | -1 (unlimited) |
| BASEROW\_USE\_PG\_FULLTEXT\_SEARCH | By default, Baserow will use Postgres full-text as its search backend. If the product is installed on a system with limited disk space, and less accurate results / degraded search performance is acceptable, then switch this setting off. | true |
| BASEROW\_USE\_PG\_FULLTEXT\_SEARCH | By default, Baserow will use Postgres full-text as its search backend. If the product is installed on a system with limited disk space, and less accurate results / degraded search performance is acceptable, then switch this setting off by setting it to false. | true |
### SSO Configuration
| Name | Description | Defaults |

View file

@ -1,6 +1,4 @@
import { getClient } from '../client'
import { faker } from '@faker-js/faker'
import {Database} from "./database";
import {User} from "./user";
import {Table} from "./table";
@ -25,31 +23,37 @@ export async function createField(user: User, fieldName: string, type: string, f
type: type,
...fieldSettings
})
return new Field(
const field1 = new Field(
response.data.id,
response.data.name,
response.data.type,
table,
response.data
)
);
console.log(`created field ${field1.name} in ${field1.table.name}`)
return field1
}
export async function updateField(user: User, fieldName: string, type: string, fieldSettings: any, field: Field): Promise<Field> {
const response: any = await getClient(user).patch(`database/fields/${field.id}/`, {
const data = {
name: fieldName,
type: type,
...fieldSettings
})
return new Field(
};
const response: any = await getClient(user).patch(`database/fields/${field.id}/`, data)
const f = new Field(
response.data.id,
response.data.name,
response.data.type,
field.table,
response.data
)
console.log(`update field ${field.name} in ${f.name} in ${f.table.name}`)
return f
}
export async function deleteField(user: User, field: Field): Promise<void> {
console.log(`deleting field ${field.name} in ${field.table.name}`)
await getClient(user).delete(`database/fields/${field.id}/`)
}

View file

@ -1,5 +1,4 @@
import { getClient } from '../client'
import { faker } from '@faker-js/faker'
import {getClient} from '../client'
import {Database} from "./database";
import {User} from "./user";

View file

@ -165,6 +165,6 @@ export class TablePage extends BaserowPage {
}
async waitForFirstCellToBeBlank() {
await expect(this.firstNonPrimaryCell.locator('*')).toHaveCount(0)
await expect(this.firstNonPrimaryCell.locator('div *')).toHaveCount(0)
}
}

View file

@ -11,9 +11,11 @@ import {
deleteAllNonPrimaryFieldsFromTable,
deleteField,
Field,
getFieldsForTable,
updateField
} from "../../fixtures/field";
import {updateRows} from "../../fixtures/rows";
import ro from "@faker-js/faker/locales/ro";
let user = null;
let sharedPageTestData: SharedTestData = null;
@ -91,7 +93,7 @@ class TestCase {
if (this.subFieldSetup.setCellFunc) {
await this.subFieldSetup.setCellFunc(tablePage, this.cellValue)
} else {
const rowValue = {id:1}
const rowValue = {id: 1}
rowValue[this.subFieldSetup.field.name] = null
await updateRows(user, sharedPageTestData.tableA, [rowValue])
rowValue[this.subFieldSetup.field.name] = this.cellValue
@ -111,7 +113,7 @@ class SubFieldSetup {
field: Field
otherFields: Field[]
constructor(public name: string, public fieldType: FieldType, public fieldSettings: any, public otherFieldsToMake: () => any[], public setCellFunc: Function | null, public testCaseInputs: TestCaseInput[]) {
constructor(public name: string, public fieldType: FieldType, public fieldSettings: () => any, public otherFieldsToMakeOrUpdate: () => any[], public setCellFunc: Function | null, public testCaseInputs: TestCaseInput[]) {
this.setup = false
this.testCases = []
for (let i = 0; i < testCaseInputs.length; i++) {
@ -133,16 +135,26 @@ class SubFieldSetup {
// Premake the field so it always the first field in the list
this.field = await createField(user, this.name, 'text', {}, sharedPageTestData.tableA)
this.otherFields = []
for (const otherField of this.otherFieldsToMake()) {
this.otherFields.push(await createField(user, otherField.name, otherField.type, otherField.settings, otherField.table))
for (const otherField of this.otherFieldsToMakeOrUpdate()) {
if (otherField.updatePrimary) {
const primary = await this._getPrimary(otherField);
this.otherFields.push(await updateField(user, otherField.name, otherField.type, otherField.settings, primary))
} else {
this.otherFields.push(await createField(user, otherField.name, otherField.type, otherField.settings, otherField.table))
}
}
this.field = await updateField(user, this.name, this.fieldType.type, this.fieldSettings, this.field)
this.field = await updateField(user, this.name, this.fieldType.type, this.fieldSettings(), this.field)
// Double check page is as expected
await tablePage.waitForLoadingOverlayToDisappear()
await expect(tablePage.fields()).toHaveCount(this.expectedNumFields(), {timeout: 30000})
this.setup = true
}
private async _getPrimary(otherField) {
const fields = await getFieldsForTable(user, otherField.table)
return fields.filter((f) => f.fieldSettings.primary)[0];
}
private expectedNumFields() {
// Primary + Test + Other test fields
return 2 + this.otherFields.filter((f) => f.table.id === sharedPageTestData.tableA.id).length;
@ -151,7 +163,21 @@ class SubFieldSetup {
async tearDown(tablePage: TablePage) {
await deleteField(user, this.field)
for (const otherField of this.otherFields.reverse()) {
await deleteField(user, otherField)
if (otherField.fieldSettings.primary) {
const primary = await this._getPrimary(otherField);
await updateField(user, primary.name, 'text', {}, primary)
const blankRows = []
for(let i = 1; i < 3; i++){
const o = {
id: i
}
o[primary.name] = null
blankRows.push(o)
}
await updateRows(user, primary.table, blankRows)
} else {
await deleteField(user, otherField)
}
}
await expect(tablePage.fields()).toHaveCount(1)
}
@ -175,7 +201,7 @@ type SubFieldSetupInput = {
name?: string,
testCases?: TestCaseInput[]
fieldSettings?: Record<string, any>,
otherFieldsToMake?: () => FieldInput[],
otherFieldsToMakeOrUpdate?: () => FieldInput[],
setCellValueFunc?: (TablePage, any) => void
}
@ -185,14 +211,24 @@ class FieldType {
constructor(public type: string, public subFieldSetupInputs: SubFieldSetupInput[], public defaultSubFieldSetupValue: SubFieldSetupInput) {
this.subFieldSetups = subFieldSetupInputs.map((i) => new SubFieldSetup(i.name,
this,
{...defaultSubFieldSetupValue.fieldSettings, ...i.fieldSettings},
() => {
let defaultFieldSettings = defaultSubFieldSetupValue.fieldSettings;
if (typeof defaultFieldSettings === 'function') {
defaultFieldSettings = defaultFieldSettings()
}
return {...
defaultFieldSettings,
...
i.fieldSettings
}
},
() => {
let result = []
if(defaultSubFieldSetupValue.otherFieldsToMake){
result = result.concat(defaultSubFieldSetupValue.otherFieldsToMake())
if (defaultSubFieldSetupValue.otherFieldsToMakeOrUpdate) {
result = result.concat(defaultSubFieldSetupValue.otherFieldsToMakeOrUpdate())
}
if(i.otherFieldsToMake){
result = result.concat(i.otherFieldsToMake())
if (i.otherFieldsToMakeOrUpdate) {
result = result.concat(i.otherFieldsToMakeOrUpdate())
}
return result
},
@ -246,31 +282,31 @@ function doesNotMatchRowId(...searches: string[]): TestCaseInput {
}
}
const setTargetFieldAndLinkCellValuesFunc = (targetName) => {
return async function (tablePage: TablePage, cellValue: any[]) {
await updateRows(user, sharedPageTestData.tableA, [{
id: 1,
'link_to_b': []
}])
await tablePage.waitForFirstCellToBeBlank()
const rowUpdates = []
const linkIds = []
for (let i = 0; i < cellValue.length; i++) {
const rowId = i + 1;
const row = {id: rowId};
row[targetName] = cellValue[i]
rowUpdates.push(row)
linkIds.push(rowId)
}
await updateRows(user, sharedPageTestData.tableB, rowUpdates)
await updateRows(user, sharedPageTestData.tableA, [{
id: 1,
'link_to_b': linkIds
}])
await tablePage.waitForFirstCellNotBeBlank()
};
};
const fieldTypes = [
new FieldType(
'number',
[
{
name: 'number field with 4 DP',
testCases: [
matches(5.234, '5', '5.2', '+5.23'),
matches(-5.234, '-5', '-5.2', '-5.23'),
doesNotMatch(-5.234, '5', '5.2')
],
fieldSettings: {number_decimal_places:4, number_negative:true}
},
{
name: 'number field with 0 DP',
testCases: [
matches(5, '5'),
matches(-5, '-5'),
doesNotMatch(-5, '5', '5.2', 'a')
],
fieldSettings: {number_decimal_places:0, number_negative: true}
},
],
{}
),
new FieldType(
'lookup',
[
@ -279,7 +315,7 @@ const fieldTypes = [
testCases: [
matchesWithoutSelf(['2023-01-10T00:00:00Z', '4023-01-10T12:00:00Z'], '10/01/4023'),
],
otherFieldsToMake: () => [
otherFieldsToMakeOrUpdate: () => [
{
"type": "date",
"name": "target",
@ -297,7 +333,7 @@ const fieldTypes = [
testCases: [
matches(['test', 'other'], 't', 'te', 'tes', 'test'),
],
otherFieldsToMake: () => [
otherFieldsToMakeOrUpdate: () => [
{
"type": "text",
"name": "target",
@ -307,31 +343,12 @@ const fieldTypes = [
},
],
{
setCellValueFunc: async function (tablePage: TablePage, cellValue: any[]) {
await updateRows(user, sharedPageTestData.tableA, [{
id: 1,
'link_to_b': []
}])
await tablePage.waitForFirstCellToBeBlank()
const rowUpdates = []
const linkIds = []
for (let i = 0; i < cellValue.length; i++) {
const rowId = i+1;
rowUpdates.push({'target': cellValue[i], id: rowId})
linkIds.push(rowId)
}
await updateRows(user, sharedPageTestData.tableB, rowUpdates)
await updateRows(user, sharedPageTestData.tableA, [{
id: 1,
'link_to_b':linkIds
}])
await tablePage.waitForFirstCellNotBeBlank()
},
setCellValueFunc: setTargetFieldAndLinkCellValuesFunc('target'),
fieldSettings: {
'target_field_name': 'target',
'through_field_name': 'link_to_b'
},
otherFieldsToMake: () => [
otherFieldsToMakeOrUpdate: () => [
{
"type": "link_row",
"name": "link_to_b",
@ -343,6 +360,63 @@ const fieldTypes = [
],
},
),
new FieldType(
'link_row',
[
{
name: 'link of date field',
testCases: [
matchesWithoutSelf(['2023-01-10T00:00:00Z', '4023-01-10T12:00:00Z'], '10/01/4023'),
],
otherFieldsToMakeOrUpdate: () => [
{
"updatePrimary": true,
"type": "date",
"name": "primary",
"table": sharedPageTestData.tableB,
settings: {
"date_format": "EU",
"date_include_time": true,
"date_force_timezone": "UTC"
}
},
],
},
],
{
setCellValueFunc: setTargetFieldAndLinkCellValuesFunc('primary'),
fieldSettings: () => {
return {
name: 'link_to_b',
"link_row_table_id": sharedPageTestData.tableB.id,
}
},
},
),
new FieldType(
'number',
[
{
name: 'number field with 4 DP',
testCases: [
matches(5.234, '5', '5.2', '+5.23'),
matches(-5.234, '-5', '-5.2', '-5.23'),
doesNotMatch(-5.234, '5', '5.2')
],
fieldSettings: {number_decimal_places: 4, number_negative: true}
},
{
name: 'number field with 0 DP',
testCases: [
matches(5, '5'),
matches(-5, '-5'),
doesNotMatch(-5, '5', '5.2', 'a')
],
fieldSettings: {number_decimal_places: 0, number_negative: true}
},
],
{}
),
new FieldType(
'text',
[