Merge branch 'clear_only_baserow_models_cache' into 'develop'

Don't clear Django global apps cache See merge request baserow/baserow!2105
2025-04-18 03:13:47 +00:00 · 2024-02-21 09:20:59 +00:00 · 2024-02-21 09:20:59 +00:00 · f7b75654e6
commit f7b75654e6
parent 0c94f35714 a5307a392b
5 changed files with 101 additions and 114 deletions
--- a/backend/src/baserow/contrib/database/apps.py
+++ b/backend/src/baserow/contrib/database/apps.py
@ -20,43 +20,7 @@ from baserow.ws.registries import page_registry
 class DatabaseConfig(AppConfig):
    name = "baserow.contrib.database"

-    def prevent_generated_model_for_registering(self):
-        """
-        A nasty hack that prevents a generated table model and related auto created
-        models from being registered to the apps. When a model class is defined it
-        will be registered to the apps, but we do not always want that to happen
-        because models with the same class name can differ. They are also meant to be
-        temporary. Removing the model from the cache does not work because if there
-        are multiple requests at the same, it is not removed from the cache on time
-        which could result in hard failures. It is also hard to extend the
-        django.apps.registry.apps so this hack extends the original `register_model`
-        method and it will only call the original `register_model` method if the
-        model is not a generated table model.
-
-        If anyone has a better way to prevent the models from being registered then I
-        am happy to hear about it! :)
-        """
-
-        original_register_model = self.apps.register_model
-
-        def register_model(app_label, model):
-            if not hasattr(model, "_generated_table_model") and not hasattr(
-                model._meta.auto_created, "_generated_table_model"
-            ):
-                original_register_model(app_label, model)
-            else:
-                # Trigger the pending operations because the original register_model
-                # method also triggers them. Not triggering them can cause a memory
-                # leak because everytime a table model is generated, it will register
-                # new pending operations.
-                self.apps.do_pending_operations(model)
-                self.apps.clear_cache()
-
-        self.apps.register_model = register_model
-
    def ready(self):
-        self.prevent_generated_model_for_registering()
-
        from baserow.core.action.registries import (
            action_scope_registry,
            action_type_registry,
--- a/backend/src/baserow/contrib/database/fields/field_types.py
+++ b/backend/src/baserow/contrib/database/fields/field_types.py
@ -2264,19 +2264,23 @@ class LinkRowFieldType(ManyToManyFieldTypeSerializeToInputValueMixin, FieldType)
        # Store the current table's model into the manytomany_models object so that the
        # related ManyToMany field can use that one. Otherwise we end up in a recursive
        # loop.
-        model.baserow_m2m_models[instance.table_id] = model
+        model_name = model._meta.model_name
+        model.baserow_models[model_name] = model

-        # Check if the related table model is already in the model.baserow_m2m_model.
+        # Check if the related table model is already in the model.baserow_models.
        if instance.is_self_referencing:
            related_model = model
        else:
-            related_model = model.baserow_m2m_models.get(instance.link_row_table_id)
+            related_model_name = Table.get_table_model_name(
+                instance.link_row_table_id
+            ).lower()
+            related_model = model.baserow_models.get(related_model_name)
            # If we do not have a related table model already we can generate a new one.
            if related_model is None:
                related_model = instance.link_row_table.get_model(
-                    manytomany_models=model.baserow_m2m_models
+                    manytomany_models=model.baserow_models
                )
-                model.baserow_m2m_models[instance.link_row_table_id] = related_model
+                model.baserow_models[related_model_name] = related_model

        instance._related_model = related_model
        related_name = f"reversed_field_{instance.id}"
@ -2297,8 +2301,6 @@ class LinkRowFieldType(ManyToManyFieldTypeSerializeToInputValueMixin, FieldType)
                    related_name = related_field["name"]
                    break

-        # Note that the through model will not be registered with the apps because
-        # of the `DatabaseConfig.prevent_generated_model_for_registering` hack.
        models.ManyToManyField(
            to=related_model,
            related_name=related_name,
@ -2308,20 +2310,6 @@ class LinkRowFieldType(ManyToManyFieldTypeSerializeToInputValueMixin, FieldType)
            db_constraint=False,
        ).contribute_to_class(model, field_name)

-        model_field = model._meta.get_field(field_name)
-        through_model = model_field.remote_field.through
-
-        # Trigger the newly created pending operations of all the models related to the
-        # created ManyToManyField. They need to be called manually because normally
-        # they are triggered when a new model is registered. Not triggering them
-        # can cause a memory leak because every time a table model is generated, it will
-        # register new pending operations.
-        apps = model._meta.apps
-        apps.do_pending_operations(model)
-        apps.do_pending_operations(related_model)
-        apps.do_pending_operations(through_model)
-        apps.clear_cache()
-
    def prepare_values(self, values, user):
        """
        This method checks if the provided link row table is an int because then it
@ -3930,21 +3918,6 @@ class MultipleSelectFieldType(
            to=model, related_name=field_name, **shared_kwargs
        ).contribute_to_class(select_option_model, related_name)

-        # Trigger the newly created pending operations of all the models related to the
-        # created ManyToManyField. They need to be called manually because normally
-        # they are triggered when a new model is registered. Not triggering them
-        # can cause a memory leak because everytime a table model is generated, it will
-        # register new pending operations.
-        apps = model._meta.apps
-        model_field = model._meta.get_field(field_name)
-        select_option_field = select_option_model._meta.get_field(related_name)
-        apps.do_pending_operations(model)
-        apps.do_pending_operations(select_option_model)
-        apps.do_pending_operations(model_field.remote_field.through)
-        apps.do_pending_operations(model)
-        apps.do_pending_operations(select_option_field.remote_field.through)
-        apps.clear_cache()
-
    def get_export_serialized_value(self, row, field_name, cache, files_zip, storage):
        cache_entry = f"{field_name}_relations"
        if cache_entry not in cache:
@ -5383,21 +5356,6 @@ class MultipleCollaboratorsFieldType(
            **shared_kwargs,
        ).contribute_to_class(user_model, related_name)

-        # Trigger the newly created pending operations of all the models related to the
-        # created CollaboratorField. They need to be called manually because normally
-        # they are triggered when a new model is registered. Not triggering them
-        # can cause a memory leak because everytime a table model is generated, it will
-        # register new pending operations.
-        apps = model._meta.apps
-        model_field = model._meta.get_field(field_name)
-        collaborator_field = user_model._meta.get_field(related_name)
-        apps.do_pending_operations(model)
-        apps.do_pending_operations(user_model)
-        apps.do_pending_operations(model_field.remote_field.through)
-        apps.do_pending_operations(model)
-        apps.do_pending_operations(collaborator_field.remote_field.through)
-        apps.clear_cache()
-
    def enhance_queryset(self, queryset, field, name):
        return queryset.prefetch_related(name)

--- a/backend/src/baserow/contrib/database/fields/registries.py
+++ b/backend/src/baserow/contrib/database/fields/registries.py
@ -75,7 +75,6 @@ class FieldType(
    ModelInstanceMixin,
    Instance,
 ):
-
    """
    This abstract class represents a custom field type that can be added to the
    field type registry. It must be extended so customisation can be done. Each field
--- a/backend/src/baserow/contrib/database/table/models.py
+++ b/backend/src/baserow/contrib/database/table/models.py
@ -687,27 +687,84 @@ class GeneratedTableModel(HierarchicalModelMixin, models.Model):
        abstract = True


-class DefaultAppsProxy:
+class GeneratedModelAppsProxy:
    """
-    A proxy class to the default apps registry.
-    This class is needed to make our dynamic models available in the
-    options then the relation tree is built.
+    A proxy class to the default apps registry. This class is needed to make our dynamic
+    models available in the options when the relation tree is built, without polluting
+    the global apps registry, meant to keep only the static models that do not change.

-    This permits to django to find the reverse relation in the _relation_tree.
-    Look into django.db.models.options.py - _populate_directed_relation_graph
-    for more information.
+    This permits to Django to find the reverse relation in the _relation_tree. Look into
+    django.db.models.options.py - _populate_directed_relation_graph for more
+    information.
+
+    It also allows us to register dynamic models in a separate registry and to perform
+    all the pending operations for the generated models without the need of clearing the
+    global apps registry cache.
+
+    This registry, created as needed by a generated table model, holds references to
+    other such models. It's discarded after the operation, ensuring it only exists when
+    necessary.
    """

-    def __init__(self, baserow_m2m_models):
-        self.baserow_m2m_models = baserow_m2m_models
+    def __init__(self, baserow_models=None):
+        self.baserow_models = baserow_models or {}
+        self.baserow_app_label = "database_table"

    def get_models(self, *args, **kwargs):
-        # Called by django and must contain ALL the models that have been generated
-        # and connected together as django will loop over every model in this list
-        # and set cached properties on each. These cached django properties are then
-        # used to when looking up fields, so they must include every connected model
-        # that could be involved in queries and not just a sub-set of them.
-        return apps.get_models(*args, **kwargs) + list(self.baserow_m2m_models.values())
+        """
+        Called by django and must contain ALL the models that have been generated
+        and connected together as django will loop over every model in this list
+        and set cached properties on each. These cached django properties are then
+        used to when looking up fields, so they must include every connected model
+        that could be involved in queries and not just a sub-set of them.
+        """
+
+        return apps.get_models(*args, **kwargs) + list(self.baserow_models.values())
+
+    def register_model(self, app_label, model):
+        """
+        This is hack that prevents a generated table model and related auto created
+        models from being registered into the Django apps model registry. It tries to
+        keep separate Django's model registry from Baserow's generated models. In this
+        way we can leverage all the great features of Django's static models, while
+        still being able to generate dynamic models for tables, without polluting the
+        global ones.
+        """
+
+        model_name = model._meta.model_name.lower()
+        if not hasattr(model, "_generated_table_model"):
+            # it must be an auto created intermediary m2m model, so use a list of
+            # baserow models we can later use to resolve the pending operations.
+            if not hasattr(self, "baserow_models"):
+                self.baserow_models = model._meta.auto_created.baserow_models
+
+        self.baserow_models[model_name] = model
+        self.do_all_pending_operations()
+        self._clear_baserow_models_cache()
+
+    def _clear_baserow_models_cache(self):
+        for model in self.baserow_models.values():
+            model._meta._expire_cache()
+
+    def do_all_pending_operations(self):
+        """
+        This method will perform all the pending operations for the generated models.
+        It will keep performing the pending operations until there are no more pending
+        operations left. It will perform a maximum of `max_iterations` to prevent
+        infinite loops and because one pending operation can trigger another pending
+        operation for another model. The number of 3 has been chosen because it's
+        the number observed to be enough to resolve all pending operations in the
+        tests.
+        """
+
+        max_iterations = 3
+        for _ in range(max_iterations):
+            for _, model_name in list(apps._pending_operations.keys()):
+                model = self.baserow_models[model_name]
+                apps.do_pending_operations(model)
+
+            if not apps._pending_operations:
+                break

    def __getattr__(self, attr):
        return getattr(apps, attr)
@ -857,6 +914,10 @@ class Table(
        queryset = Table.objects.filter(database=database)
        return cls.get_highest_order_of_queryset(queryset) + 1

+    @classmethod
+    def get_table_model_name(cls, table_id):
+        return f"Table{table_id}Model"
+
    def get_database_table_name(self):
        return f"{USER_TABLE_DATABASE_NAME_PREFIX}{self.id}"

@ -875,9 +936,7 @@ class Table(
    ) -> Type[GeneratedTableModel]:
        """
        Generates a temporary Django model based on available fields that belong to
-        this table. Note that the model will not be registered with the apps because
-        of the `DatabaseConfig.prevent_generated_model_for_registering` hack. We do
-        not want to the model cached because models with the same name can differ.
+        this table.

        :param fields: Extra table field instances that need to be added the model.
        :type fields: list
@ -913,7 +972,7 @@ class Table(
        """

        filtered = field_names is not None or field_ids is not None
-        model_name = f"Table{self.pk}Model"
+        model_name = self.get_table_model_name(self.pk)

        if fields is None:
            fields = []
@ -929,13 +988,13 @@ class Table(
            )
        ]

-        app_label = "database_table"
-        baserow_m2m_models = manytomany_models or {}
+        apps = GeneratedModelAppsProxy(manytomany_models)
+        app_label = apps.baserow_app_label
        meta = type(
            "Meta",
            (),
            {
-                "apps": DefaultAppsProxy(baserow_m2m_models),
+                "apps": apps,
                "managed": managed,
                "db_table": self.get_database_table_name(),
                "app_label": app_label,
@ -966,7 +1025,7 @@ class Table(
            "_generated_table_model": True,
            "baserow_table": self,
            "baserow_table_id": self.id,
-            "baserow_m2m_models": baserow_m2m_models,
+            "baserow_models": apps.baserow_models,
            # We are using our own table model manager to implement some queryset
            # helpers.
            "objects": TableModelManager(),
@ -1061,7 +1120,7 @@ class Table(

        patch_meta_get_field(model._meta)

-        if not model.baserow_m2m_models:
+        if not manytomany_models:
            self._after_model_generation(attrs, model)

        return model
@ -1096,7 +1155,7 @@ class Table(
            related_name="+",
            related_query_name="+",
            db_constraint=False,
-            on_delete=models.SET_NULL,
+            on_delete=models.DO_NOTHING,
            help_text="Stores information about the user that created the row.",
        )

@ -1107,7 +1166,7 @@ class Table(
            related_name="+",
            related_query_name="+",
            db_constraint=False,
-            on_delete=models.SET_NULL,
+            on_delete=models.DO_NOTHING,
            help_text="Stores information about the user that modified the row last.",
        )

--- a/changelog/entries/unreleased/refactor/refactored_cache_clearing_logic_to_target_only_dynamic_model.json
+++ b/changelog/entries/unreleased/refactor/refactored_cache_clearing_logic_to_target_only_dynamic_model.json
@ -0,0 +1,7 @@
+{
+    "type": "refactor",
+    "message": "Refactored cache clearing logic to target only dynamic models, preserving the global cache for all models.",
+    "issue_number": null,
+    "bullet_points": [],
+    "created_at": "2024-02-21"
+}