1
0
Fork 0
mirror of https://gitlab.com/bramw/baserow.git synced 2025-04-10 15:47:32 +00:00

Builder improve global cache

This commit is contained in:
Jérémie Pardou 2025-02-26 10:51:41 +00:00
parent 406eda9f75
commit b1d25d4da9
9 changed files with 245 additions and 221 deletions
backend
src/baserow
contrib/builder
core
tests/baserow

View file

@ -69,6 +69,7 @@ from baserow.contrib.builder.workflow_actions.registries import (
from baserow.contrib.builder.workflow_actions.service import (
BuilderWorkflowActionService,
)
from baserow.core.cache import global_cache
from baserow.core.exceptions import ApplicationDoesNotExist, PermissionException
from baserow.core.services.exceptions import (
DoesNotExist,
@ -77,7 +78,6 @@ from baserow.core.services.exceptions import (
ServiceSortPropertyDoesNotExist,
)
from baserow.core.services.registries import service_type_registry
from baserow.core.utils import safe_get_or_set_cache
class PublicBuilderByDomainNameView(APIView):
@ -111,11 +111,8 @@ class PublicBuilderByDomainNameView(APIView):
the public site.
"""
data = safe_get_or_set_cache(
cache_key=DomainHandler.get_public_builder_by_domain_cache_key(domain_name),
version_cache_key=DomainHandler.get_public_builder_by_domain_version_cache_key(
domain_name
),
data = global_cache.get(
DomainHandler.get_public_builder_by_domain_cache_key(domain_name),
default=lambda: self._get_public_builder_by_domain(request, domain_name),
timeout=BUILDER_PUBLIC_BUILDER_BY_DOMAIN_TTL_SECONDS,
)
@ -218,8 +215,8 @@ class PublicElementsView(APIView):
"""
if PageHandler().is_published_page(page_id):
data = safe_get_or_set_cache(
cache_key=PageHandler.get_page_public_records_cache_key(
data = global_cache.get(
PageHandler.get_page_public_records_cache_key(
page_id, request.user_source_user, "elements"
),
default=lambda: self._get_public_page_elements(request, page_id),
@ -236,7 +233,8 @@ class PublicElementsView(APIView):
"""
Returns a list of serialized elements that belong to the given page id.
Only requested if the public elements cache is stale.
Only requested if the public elements cache is stale, or if the page is
being previewed.
:param request: the HTTP request.
:param page_id: the page id.
@ -289,8 +287,8 @@ class PublicDataSourcesView(APIView):
"""
if PageHandler().is_published_page(page_id):
data = safe_get_or_set_cache(
cache_key=PageHandler.get_page_public_records_cache_key(
data = global_cache.get(
PageHandler.get_page_public_records_cache_key(
page_id, request.user_source_user, "data_sources"
),
default=lambda: self._get_public_page_data_sources(request, page_id),
@ -305,7 +303,8 @@ class PublicDataSourcesView(APIView):
"""
Returns a list of serialized data sources that belong to the given page id.
Only requested if the public data sources cache is stale.
Only requested if the public data sources cache is stale, or if the page is
being previewed.
:param request: the HTTP request.
:param page_id: the page id.
@ -376,8 +375,8 @@ class PublicBuilderWorkflowActionsView(APIView):
"""
if PageHandler().is_published_page(page_id):
data = safe_get_or_set_cache(
cache_key=PageHandler.get_page_public_records_cache_key(
data = global_cache.get(
PageHandler.get_page_public_records_cache_key(
page_id, request.user_source_user, "workflow_actions"
),
default=lambda: self._get_public_page_workflow_actions(
@ -394,7 +393,8 @@ class PublicBuilderWorkflowActionsView(APIView):
"""
Returns a list of serialized workflow actions that belong to the given page id.
Only requested if the public workflow actions cache is stale.
Only requested if the public workflow actions cache is stale, or if the page is
being previewed.
:param request: the HTTP request.
:param page_id: the page id.

View file

@ -13,13 +13,14 @@ from baserow.contrib.builder.domains.models import Domain
from baserow.contrib.builder.domains.registries import DomainType
from baserow.contrib.builder.exceptions import BuilderDoesNotExist
from baserow.contrib.builder.models import Builder
from baserow.core.cache import global_cache
from baserow.core.db import specific_iterator
from baserow.core.exceptions import IdDoesNotExist
from baserow.core.models import Workspace
from baserow.core.registries import ImportExportConfig, application_type_registry
from baserow.core.storage import get_default_storage
from baserow.core.trash.handler import TrashHandler
from baserow.core.utils import Progress, extract_allowed, invalidate_versioned_cache
from baserow.core.utils import Progress, extract_allowed
class DomainHandler:
@ -284,12 +285,6 @@ class DomainHandler:
def get_public_builder_by_domain_cache_key(cls, domain_name: str) -> str:
return f"ab_public_builder_by_domain_{domain_name}"
@classmethod
def get_public_builder_by_domain_version_cache_key(cls, domain_name: str) -> str:
return f"ab_public_builder_by_domain_{domain_name}_version"
@classmethod
def invalidate_public_builder_by_domain_cache(cls, domain_name: str):
invalidate_versioned_cache(
cls.get_public_builder_by_domain_version_cache_key(domain_name)
)
global_cache.invalidate(cls.get_public_builder_by_domain_cache_key(domain_name))

View file

@ -8,12 +8,12 @@ from baserow.contrib.builder.formula_property_extractor import (
)
from baserow.contrib.builder.models import Builder
from baserow.contrib.builder.theme.registries import theme_config_block_registry
from baserow.core.cache import global_cache
from baserow.core.handler import CoreHandler
from baserow.core.models import Workspace
from baserow.core.user_sources.handler import UserSourceHandler
from baserow.core.user_sources.models import UserSource
from baserow.core.user_sources.user_source_user import UserSourceUser
from baserow.core.utils import invalidate_versioned_cache, safe_get_or_set_cache
USED_PROPERTIES_CACHE_KEY_PREFIX = "used_properties_for_page"
@ -76,8 +76,8 @@ class BuilderHandler:
@classmethod
def invalidate_builder_public_properties_cache(cls, builder: Builder):
invalidate_versioned_cache(
cls._get_builder_public_properties_version_cache(builder)
global_cache.invalidate(
invalidate_key=cls._get_builder_public_properties_version_cache(builder)
)
def get_builder_public_properties(
@ -100,10 +100,12 @@ class BuilderHandler:
properties = get_builder_used_property_names(user, builder)
return SENTINEL if properties is None else properties
result = safe_get_or_set_cache(
result = global_cache.get(
self.get_builder_used_properties_cache_key(user, builder),
self._get_builder_public_properties_version_cache(builder),
default=compute_properties,
# We want to invalidate the cache for all roles at once so we create a
# unique key for all.
invalidate_key=self._get_builder_public_properties_version_cache(builder),
timeout=settings.BUILDER_PUBLICLY_USED_PROPERTIES_CACHE_TTL_SECONDS
if builder.workspace_id
else BUILDER_PREVIEW_USED_PROPERTIES_CACHE_TTL_SECONDS,
@ -148,13 +150,3 @@ class BuilderHandler:
)
)
return UserSourceHandler().aggregate_user_counts(workspace, queryset)
@classmethod
def get_public_builder_by_domain_version_cache(cls, domain_name: str) -> str:
return f"get_public_builder_by_domain_{domain_name}"
@classmethod
def invalidate_public_builder_by_domain_cache(cls, domain_name: str):
invalidate_versioned_cache(
cls.get_public_builder_by_domain_version_cache(domain_name)
)

View file

@ -40,15 +40,11 @@ from baserow.contrib.builder.types import PageDict
from baserow.contrib.builder.workflow_actions.handler import (
BuilderWorkflowActionHandler,
)
from baserow.core.cache import global_cache
from baserow.core.exceptions import IdDoesNotExist
from baserow.core.storage import ExportZipFile
from baserow.core.user_sources.user_source_user import UserSourceUser
from baserow.core.utils import (
ChildProgressBuilder,
MirrorDict,
find_unused_name,
safe_get_or_set_cache,
)
from baserow.core.utils import ChildProgressBuilder, MirrorDict, find_unused_name
BUILDER_PAGE_IS_PUBLISHED_CACHE_TTL_SECONDS = 60 * 60
@ -265,7 +261,7 @@ class PageHandler:
:return: whether this public page ID is published or not.
"""
return safe_get_or_set_cache(
return global_cache.get(
f"ab_public_page_{public_page_id}_published",
default=lambda: self._is_published_application_page(public_page_id),
timeout=BUILDER_PAGE_IS_PUBLISHED_CACHE_TTL_SECONDS,

View file

@ -2,8 +2,12 @@ from contextlib import contextmanager
from typing import Callable, TypeVar
from django.conf import settings
from django.core.cache import cache
from asgiref.local import Local
from redis.exceptions import LockNotOwnedError
from baserow.version import VERSION as BASEROW_VERSION
T = TypeVar("T")
@ -108,3 +112,147 @@ class LocalCacheMiddleware:
def __call__(self, request):
with local_cache.context():
return self.get_response(request)
SENTINEL = object()
class GlobalCache:
"""
A global cache wrapper around the Django cache system that provides
invalidation capabilities and a lock mechanism to prevent multiple
concurrent updates. It's also versioned with Baserow version.
Example Usage:
# Storing and retrieving a value
value = global_cache.get(
"user_123_data",
default=lambda: expensive_computation(),
timeout=300
)
# Invalidating a cache key
global_cache.invalidate("user_123_data")
"""
VERSION_KEY_TTL = 60 * 60 * 24 * 10 # 10 days
def _get_version_cache_key(
self, key: str, invalidate_key: None | str = None
) -> str:
"""
Generates a versioned cache key for tracking different versions of a cached
value.
:param key: The base cache key.
:param invalidate_key: The key used when this cache is invalidated.
:return: A modified cache key used for version tracking.
"""
key = key if invalidate_key is None else invalidate_key
return f"{BASEROW_VERSION}_{key}__current_version"
def _get_cache_key_with_version(self, key: str) -> str:
"""
Generates a cache key with included version.
:param key: The base cache key.
:return: A modified cache key with version.
"""
version = cache.get(self._get_version_cache_key(key), 0)
return f"{BASEROW_VERSION}_{key}__version_{version}"
def get(
self,
key: str,
default: T | Callable[[], T] = None,
invalidate_key: None | str = None,
timeout: int = 60,
) -> T:
"""
Retrieves a value from the cache if it exists; otherwise, sets it using the
provided default value.
This function also uses a lock (if available on the cache backend) to ensure
multi call safety when setting a new value.
:param key: The key of the cache value to get (or set). Make sure this key is
unique and not used elsewhere.
:param invalidate_key: The key used when this cache is invalidated. A default
one is used if none is provided and this value otherwise. Can be used to
invalidate multiple caches at the same time. When invalidating the cache you
must use the same key later.
:param default: The default value to store in the cache if the key is absent.
Can be either a literal value or a callable. If it's a callable,
the function is called to retrieve the default value.
:param timeout: The cache timeout in seconds for newly set values.
Defaults to 60.
:return: The cached value if it exists; otherwise, the newly set value.
"""
version_key = self._get_version_cache_key(key, invalidate_key)
version = cache.get(version_key, 0)
cache_key_to_use = f"{BASEROW_VERSION}_{key}__version_{version}"
cached = cache.get(cache_key_to_use, SENTINEL)
if cached is SENTINEL:
use_lock = hasattr(cache, "lock")
if use_lock:
cache_lock = cache.lock(f"{cache_key_to_use}__lock", timeout=10)
cache_lock.acquire()
try:
cached = cache.get(cache_key_to_use, SENTINEL)
# We check again to make sure it hasn't been populated in the meantime
# while acquiring the lock
if cached is SENTINEL:
if callable(default):
cached = default()
else:
cached = default
cache.set(
cache_key_to_use,
cached,
timeout=timeout,
)
finally:
if use_lock:
try:
cache_lock.release()
except LockNotOwnedError:
# If the lock release fails, it might be because of the timeout
# and it's been stolen so we don't really care
pass
return cached
def invalidate(self, key: None | str = None, invalidate_key: None | str = None):
"""
Invalidates the cached value associated with the given key, ensuring that
subsequent cache reads will miss and require a new value to be set.
:param key: The cache key to invalidate.
:param invalidate_key: The key to use for invalidation. If provided, this key
must match the one given at cache creation.
"""
version_key = self._get_version_cache_key(key, invalidate_key)
try:
cache.incr(version_key, 1)
except ValueError:
# If the cache key does not exist, initialize its versioning.
cache.set(
version_key,
1,
timeout=self.VERSION_KEY_TTL,
)
global_cache = GlobalCache()

View file

@ -14,31 +14,17 @@ from decimal import Decimal
from fractions import Fraction
from itertools import chain, islice
from numbers import Number
from typing import (
Any,
Callable,
Dict,
Iterable,
List,
Optional,
Set,
Tuple,
Type,
Union,
)
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type, Union
from django.conf import settings
from django.core.cache import cache
from django.db import transaction
from django.db.models import ForeignKey, ManyToManyField, Model
from django.db.models.fields import NOT_PROVIDED
from django.db.transaction import get_connection
from redis.exceptions import LockNotOwnedError
from requests.utils import guess_json_utf
from baserow.contrib.database.db.schema import optional_atomic
from baserow.version import VERSION as BASEROW_VERSION
from .exceptions import CannotCalculateIntermediateOrder
@ -1207,84 +1193,3 @@ def are_hostnames_same(hostname1: str, hostname2: str) -> bool:
ips1 = get_all_ips(hostname1)
ips2 = get_all_ips(hostname2)
return not ips1.isdisjoint(ips2)
SENTINEL = object()
def safe_get_or_set_cache(
cache_key: str,
version_cache_key: str = None,
default: Any | Callable = None,
timeout: int = 60,
) -> Any:
"""
Retrieves a value from the cache if it exists; otherwise, sets it using the
provided default value. If a version cache key is provided, the function uses
a versioned key to manage cache invalidation.
This function also uses a lock (if available on the cache backend) to ensure
multi call safety when setting a new value.
:param cache_key: The base key to look up in the cache.
:param version_cache_key: An optional key used to version the cache. If
provided,.
:param default: The default value to store in the cache if the key is absent.
Can be either a literal value or a callable. If it's a callable,
the function is called to retrieve the default value.
:param timeout: The cache timeout in seconds for newly set values. Defaults to 60.
:return: The cached value if it exists; otherwise, the newly set value.
"""
cache_key_to_use = f"{BASEROW_VERSION}_{cache_key}"
if version_cache_key is not None:
version = cache.get(version_cache_key, 0)
cache_key_to_use = f"{cache_key}__version_{version}"
cached = cache.get(cache_key_to_use, SENTINEL)
if cached is SENTINEL:
use_lock = hasattr(cache, "lock")
if use_lock:
cache_lock = cache.lock(f"{cache_key_to_use}__lock", timeout=10)
cache_lock.acquire()
try:
cached = cache.get(cache_key_to_use, SENTINEL)
# We check again to make sure it hasn't been populated in the meantime
# while acquiring the lock
if cached is SENTINEL:
if callable(default):
cached = default()
else:
cached = default
cache.set(
cache_key_to_use,
cached,
timeout=timeout,
)
finally:
if use_lock:
try:
cache_lock.release()
except LockNotOwnedError:
# If the lock release fails, it might be because of the timeout
# and it's been stolen so we don't really care
pass
return cached
def invalidate_versioned_cache(version_cache_key: str):
"""
Invalidates (or increments) the version associated with a versioned cache,
forcing future reads on this versioned key to miss the cache.
:param version_cache_key: The key whose version is to be incremented in the cache.
"""
try:
cache.incr(version_cache_key, 1)
except ValueError:
# No cache key, we create one
cache.set(version_cache_key, 1)

View file

@ -9,7 +9,8 @@ from baserow.contrib.builder.domains.handler import DomainHandler
from baserow.contrib.builder.domains.models import Domain
from baserow.contrib.builder.exceptions import BuilderDoesNotExist
from baserow.contrib.builder.models import Builder
from baserow.core.utils import Progress, safe_get_or_set_cache
from baserow.core.cache import global_cache
from baserow.core.utils import Progress
@pytest.mark.django_db
@ -180,18 +181,12 @@ def test_domain_publishing(data_fixture):
domain1 = DomainHandler().publish(domain1, progress)
# Pretend that someone visited the public builder-by-domain endpoint.
builder_by_domain_cache_key = (
DomainHandler.get_public_builder_by_domain_version_cache_key(
domain1.domain_name
)
)
version_key = DomainHandler.get_public_builder_by_domain_version_cache_key(
builder_by_domain_cache_key = DomainHandler.get_public_builder_by_domain_cache_key(
domain1.domain_name
)
# We populate the builder domain cache
safe_get_or_set_cache(builder_by_domain_cache_key, version_key, default="before")
global_cache.get(builder_by_domain_cache_key, default="before")
domain1.refresh_from_db()
@ -208,10 +203,7 @@ def test_domain_publishing(data_fixture):
DomainHandler().publish(domain1, progress)
# Following a re-publish, the builder-by-domain cache is invalidated
assert (
safe_get_or_set_cache(builder_by_domain_cache_key, version_key, default="after")
== "after"
)
assert global_cache.get(builder_by_domain_cache_key, default="after") == "after"
assert Builder.objects.count() == 2

View file

@ -549,7 +549,7 @@ def test_is_published_application_page(data_fixture):
domain = DomainHandler().publish(domain)
published_builder = domain.published_to
published_page = published_builder.page_set.get()
published_page = published_builder.visible_pages.get()
assert not PageHandler()._is_published_application_page(page.id)
assert PageHandler()._is_published_application_page(published_page.id)

View file

@ -1,20 +1,20 @@
from baserow.core.utils import invalidate_versioned_cache, safe_get_or_set_cache
from baserow.core.cache import GlobalCache
def test_safe_get_or_set_cache_literally_stores_default():
def test_local_cache_get_literally_stores_default():
"""If the cache is empty, a literal default value is stored and returned."""
cache_key = "test_literal_default"
result = safe_get_or_set_cache(
cache_key=cache_key,
result = GlobalCache().get(
key=cache_key,
default="my_default_value",
timeout=6,
)
assert result == "my_default_value"
def test_safe_get_or_set_cache_callable_stores_return_value():
def test_local_cache_get_callable_stores_return_value():
"""
If the cache is empty, a callable default's return value is stored and returned.
"""
@ -24,29 +24,29 @@ def test_safe_get_or_set_cache_callable_stores_return_value():
def some_callable():
return "callable_value"
result = safe_get_or_set_cache(
cache_key=cache_key,
result = GlobalCache().get(
key=cache_key,
default=some_callable,
timeout=6,
)
assert result == "callable_value"
def test_safe_get_or_set_cache_uses_existing_value():
def test_local_cache_get_uses_existing_value():
"""
If the cache key already has a value, it should be returned without overwriting.
"""
cache_key = "test_existing"
result = safe_get_or_set_cache(
cache_key=cache_key,
result = GlobalCache().get(
key=cache_key,
default="existing_value",
timeout=60,
)
result = safe_get_or_set_cache(
cache_key=cache_key,
result = GlobalCache().get(
key=cache_key,
default="unused_default",
timeout=6,
)
@ -54,72 +54,68 @@ def test_safe_get_or_set_cache_uses_existing_value():
assert result == "existing_value"
def test_versioned_cache_set_and_retrieve():
"""
When a version_cache_key is given and the value does not exist,
it should store and retrieve the value under <cache_key>__version_X.
"""
base_key = "test_versioned_base"
version_cache_key = "test_versioned_key"
# No version exists, so this should initialize version=0
result = safe_get_or_set_cache(
cache_key=base_key,
version_cache_key=version_cache_key,
default="versioned_value",
timeout=6,
)
assert result == "versioned_value"
def test_versioned_cache_hit():
"""
If a versioned key already exists, safe_get_or_set_cache should retrieve
that existing value rather than setting a new one.
"""
base_key = "test_versioned_base2"
version_cache_key = "test_versioned_key2"
result = safe_get_or_set_cache(
cache_key=base_key,
version_cache_key=version_cache_key,
default="already_versioned",
timeout=6,
)
result = safe_get_or_set_cache(
cache_key=base_key,
version_cache_key=version_cache_key,
default="unused_default",
timeout=6,
)
assert result == "already_versioned"
def test_versioned_cache_invalidation():
"""
If a versioned key already exists, safe_get_or_set_cache should retrieve
If a versioned key already exists, local_cache_get should retrieve
that existing value rather than setting a new one.
"""
base_key = "test_versioned_base2"
version_cache_key = "test_versioned_key2"
result = safe_get_or_set_cache(
cache_key=base_key,
version_cache_key=version_cache_key,
result = GlobalCache().get(
key=base_key,
default="already_versioned",
timeout=6,
)
invalidate_versioned_cache(version_cache_key)
GlobalCache().invalidate(base_key)
result = safe_get_or_set_cache(
cache_key=base_key,
version_cache_key=version_cache_key,
result = GlobalCache().get(
key=base_key,
default="new_value",
timeout=6,
)
assert result == "new_value"
def test_versioned_cache_invalidation_with_invalidation_key():
"""
If a versioned key already exists, local_cache_get should retrieve
that existing value rather than setting a new one.
"""
base_key = "test_versioned_base3_"
invalidate_key = "test_invalidate_key"
result = GlobalCache().get(
key=base_key + "1",
invalidate_key=invalidate_key,
default="already_versioned",
timeout=6,
)
result = GlobalCache().get(
key=base_key + "2",
invalidate_key=invalidate_key,
default="already_versioned",
timeout=6,
)
GlobalCache().invalidate(invalidate_key=invalidate_key)
result = GlobalCache().get(
key=base_key + "1",
invalidate_key=invalidate_key,
default="new_value",
timeout=6,
)
assert result == "new_value"
result = GlobalCache().get(
key=base_key + "2",
invalidate_key=invalidate_key,
default="new_value",
timeout=6,
)