bramw_baserow/backend/src/baserow/cachalot_patch.py

import re
from contextlib import contextmanager
from functools import wraps

from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.db.transaction import get_connection

from cachalot import utils as cachalot_utils
from cachalot.settings import cachalot_settings
from django_redis import get_redis_connection
from loguru import logger
from psycopg2.sql import Composed


@contextmanager
def cachalot_enabled():
    """
    A context manager that enables cachalot for the duration of the context. This is
    useful when you want to enable cachalot for a specific query but you don't want
    to enable it globally.
    Please note that the query have to be executed within the context of the context
    manager in order for it to be cached.
    """

    from cachalot.api import LOCAL_STORAGE

    was_enabled = getattr(
        LOCAL_STORAGE, "cachalot_enabled", cachalot_settings.CACHALOT_ENABLED
    )
    LOCAL_STORAGE.cachalot_enabled = True
    try:
        yield
    finally:
        LOCAL_STORAGE.cachalot_enabled = was_enabled


def patch_cachalot_for_baserow():
    """
    This function patches the cachalot library to make it work with baserow
    dynamic models. The problem we're trying to solve here is that the only way
    to limit what cachalot caches is to provide a fix list of tables, but
    baserow creates dynamic models on the fly so we can't know what tables will
    be created in advance, so we need to include all the tables that start with
    the USER_TABLE_DATABASE_NAME_PREFIX prefix in the list of cachable tables.

    `filter_cachable` and `is_cachable` are called to invalidate the cache when
    a table is changed. `are_all_cachable` is called to check if a query can be
    cached.
    """

    from baserow.contrib.database.table.constants import (
        LINK_ROW_THROUGH_TABLE_PREFIX,
        MULTIPLE_COLLABORATOR_THROUGH_TABLE_PREFIX,
        MULTIPLE_SELECT_THROUGH_TABLE_PREFIX,
        USER_TABLE_DATABASE_NAME_PREFIX,
    )

    original_filter_cachable = cachalot_utils.filter_cachable

    # create a single regex to match if a string provided starts with any of the
    # prefixes we want to match followed by a number
    baserow_table_names_regex = re.compile(
        r"^(?:{}|{}|{}|{})\d+".format(
            USER_TABLE_DATABASE_NAME_PREFIX,
            LINK_ROW_THROUGH_TABLE_PREFIX,
            MULTIPLE_COLLABORATOR_THROUGH_TABLE_PREFIX,
            MULTIPLE_SELECT_THROUGH_TABLE_PREFIX,
        )
    )

    def is_baserow_table(table_name):
        uncachable_tables = getattr(settings, "CACHALOT_UNCACHABLE_TABLES", [])
        return (
            table_name not in uncachable_tables
            and baserow_table_names_regex.match(table_name) is not None
        )

    @wraps(original_filter_cachable)
    def patched_filter_cachable(tables):
        return original_filter_cachable(tables).union(
            set(filter(is_baserow_table, tables))
        )

    cachalot_utils.filter_cachable = patched_filter_cachable

    original_is_cachable = cachalot_utils.is_cachable

    @wraps(original_is_cachable)
    def patched_is_cachable(table):
        return is_baserow_table(table) or original_is_cachable(table)

    cachalot_utils.is_cachable = patched_is_cachable

    original_are_all_cachable = cachalot_utils.are_all_cachable

    @wraps(original_are_all_cachable)
    def patched_are_all_cachable(tables):
        """
        This patch works because cachalot does not explicitly set this thread
        local variable, but it assumes to be True by default if CACHALOT_ENABLED
        is not set otherwise. Since we are explicitly setting it to True in our
        code for the query we want to cache, we can check if the value has been
        set or not to exclude our dynamic tables from the list of tables that
        cachalot will check, making all of them cachable for the queries
        wrapped in the `cachalot_enabled` context manager.
        """

        from cachalot.api import LOCAL_STORAGE

        cachalot_enabled = getattr(LOCAL_STORAGE, "cachalot_enabled", False)
        if cachalot_enabled:
            tables = set(filter(lambda t: not is_baserow_table(t), tables))
        return original_are_all_cachable(tables)

    cachalot_utils.are_all_cachable = patched_are_all_cachable

    baserow_tables_regex = re.compile(
        r"({}\d+|{}\d+|{}\d+|{}\d+)".format(
            USER_TABLE_DATABASE_NAME_PREFIX,
            LINK_ROW_THROUGH_TABLE_PREFIX,
            MULTIPLE_COLLABORATOR_THROUGH_TABLE_PREFIX,
            MULTIPLE_SELECT_THROUGH_TABLE_PREFIX,
        )
    )
    original_get_tables_from_sql = cachalot_utils._get_tables_from_sql

    @wraps(original_get_tables_from_sql)
    def patched_get_tables_from_sql(
        connection, lowercased_sql, enable_quote: bool = False
    ):
        baserow_tables = baserow_tables_regex.findall(lowercased_sql)
        return set(baserow_tables) | original_get_tables_from_sql(
            connection, lowercased_sql, enable_quote
        )

    cachalot_utils._get_tables_from_sql = patched_get_tables_from_sql

    def lower(self):
        """
        Cachalot wants this method to lowercase the queries to check if they are
        cachable, but the Composed class in psycopg2.sql does not have a lower
        method, so we add it here to add the support for it.
        """

        cursor = get_connection().cursor()
        return self.as_string(cursor.cursor).lower()

    Composed.lower = lower


def clear_cachalot_cache():
    """
    This function clears the cachalot cache. It can be used in the tests to make
    sure that the cache is cleared between tests or as post_migrate receiver to
    ensure to start with a clean cache after migrations.
    """

    from django.conf import settings
    from django.core.cache import caches

    logger.info("Clearing cachalot cache")
    try:
        cachalot_cache = caches[settings.CACHALOT_CACHE]
    except KeyError:
        raise ImproperlyConfigured(
            f"Could not find the {settings.CACHALOT_CACHE} cache."
        )

    if settings.TESTS:
        cachalot_cache.clear()
    else:
        key_prefix = settings.CACHES[settings.CACHALOT_CACHE]["KEY_PREFIX"]

        count = _delete_pattern(key_prefix)

        logger.info(f"Done clearing cachalot cache, cleared {count} entries.")


def _delete_pattern(key_prefix: str) -> int:
    """
    Allows deleting every redis key that matches a pattern. Copied from the
    django-redis implementation but modified to allow deleting all versions in the
    cache at once.
    """

    client = get_redis_connection("default")
    count = 0
    pipeline = client.pipeline()
    for key in client.scan_iter(match=f"{key_prefix}*", count=1000):
        pipeline.delete(key)
        count += 1
    pipeline.execute()
    return count