mirror of
https://github.com/netdata/netdata.git
synced 2025-04-17 11:12:42 +00:00
Database engine (#5282)
* Database engine prototype version 0 * Database engine initial integration with netdata POC * Scalable database engine with file and memory management. * Database engine integration with netdata * Added MIN MAX definitions to fix alpine build of travis CI * Bugfix for backends and new DB engine, remove useless rrdset_time2slot() calls and erroneous checks * DB engine disk protocol correction * Moved DB engine storage file location to /var/cache/netdata/{host}/dbengine * Fix configure to require openSSL for DB engine * Fix netdata daemon health not holding read lock when iterating chart dimensions * Optimized query API for new DB engine and old netdata DB fallback code-path * netdata database internal query API improvements and cleanup * Bugfix for DB engine queries returning empty values * Added netdata internal check for data queries for old and new DB * Added statistics to DB engine and fixed memory corruption bug * Added preliminary charts for DB engine statistics * Changed DB engine ratio statistics to incremental * Added netdata statistics charts for DB engine internal statistics * Fix for netdata not compiling successfully when missing dbengine dependencies * Added DB engine functional test to netdata unittest command parameter * Implemented DB engine dataset generator based on example.random chart * Fix build error in CI * Support older versions of libuv1 * Fixes segmentation fault when using multiple DB engine instances concurrently * Fix memory corruption bug * Fixed createdataset advanced option not exiting * Fix for DB engine not working on FreeBSD * Support FreeBSD library paths of new dependencies * Workaround for unsupported O_DIRECT in OS X * Fix unittest crashing during cleanup * Disable DB engine FS caching in Apple OS X since O_DIRECT is not available * Fix segfault when unittest and DB engine dataset generator don't have permissions to create temporary host * Modified DB engine dataset generator to create multiple files * Toned down overzealous page cache prefetcher * Reduce internal memory fragmentation for page-cache data pages * Added documentation describing the DB engine * Documentation bugfixes * Fixed unit tests compilation errors since last rebase * Added note to back-up the DB engine files in documentation * Added codacy fix. * Support old gcc versions for atomic counters in DB engine
This commit is contained in:
parent
fed63b6e99
commit
6ca6d840dd
40 changed files with 4823 additions and 70 deletions
CMakeLists.txtMakefile.am
backends
configure.acdaemon
database
Makefile.amREADME.md
engine
Makefile.amREADME.mddatafile.cdatafile.hjournalfile.cjournalfile.hpagecache.cpagecache.hrrddiskprotocol.hrrdengine.crrdengine.hrrdengineapi.crrdengineapi.hrrdenginelib.crrdenginelib.h
rrd.crrd.hrrddim.crrdhost.crrdset.clibnetdata
packaging/installer
streaming
web/api
|
@ -89,6 +89,46 @@ set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${ZLIB_CFLAGS_OTHER})
|
|||
set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${ZLIB_LIBRARIES})
|
||||
set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS})
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# libuv multi-platform support library with a focus on asynchronous I/O
|
||||
|
||||
pkg_check_modules(LIBUV REQUIRED libuv)
|
||||
set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${LIBUV_CFLAGS_OTHER})
|
||||
set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${LIBUV_LIBRARIES})
|
||||
set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${LIBUV_INCLUDE_DIRS})
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# lz4 Extremely Fast Compression algorithm
|
||||
|
||||
pkg_check_modules(LIBLZ4 REQUIRED liblz4)
|
||||
set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${LIBLZ4_CFLAGS_OTHER})
|
||||
set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${LIBLZ4_LIBRARIES})
|
||||
set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${LIBLZ4_INCLUDE_DIRS})
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Judy General purpose dynamic array
|
||||
|
||||
# pkgconfig not working in Ubuntu, why? upstream package broken?
|
||||
#pkg_check_modules(JUDY REQUIRED Judy)
|
||||
#set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${JUDY_CFLAGS_OTHER})
|
||||
#set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${JUDY_LIBRARIES})
|
||||
#set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${JUDY_INCLUDE_DIRS})
|
||||
set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} "-lJudy")
|
||||
set(CMAKE_REQUIRED_LIBRARIES "Judy")
|
||||
check_symbol_exists("JudyLLast" "Judy.h" HAVE_JUDY)
|
||||
IF(HAVE_JUDY)
|
||||
message(STATUS "Judy library found")
|
||||
ELSE()
|
||||
message( FATAL_ERROR "libJudy required but not found. Try installing 'libjudy-dev' or 'Judy-devel'." )
|
||||
ENDIF()
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# OpenSSL Cryptography and SSL/TLS Toolkit
|
||||
|
||||
pkg_check_modules(OPENSSL REQUIRED openssl)
|
||||
set(NETDATA_COMMON_CFLAGS ${NETDATA_COMMON_CFLAGS} ${OPENSSL_CFLAGS_OTHER})
|
||||
set(NETDATA_COMMON_LIBRARIES ${NETDATA_COMMON_LIBRARIES} ${OPENSSL_LIBRARIES})
|
||||
set(NETDATA_COMMON_INCLUDE_DIRS ${NETDATA_COMMON_INCLUDE_DIRS} ${OPENSSL_INCLUDE_DIRS})
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Detect libcap
|
||||
|
@ -403,6 +443,19 @@ set(RRD_PLUGIN_FILES
|
|||
database/rrdsetvar.h
|
||||
database/rrdvar.c
|
||||
database/rrdvar.h
|
||||
database/engine/rrdengine.c
|
||||
database/engine/rrdengine.h
|
||||
database/engine/rrddiskprotocol.h
|
||||
database/engine/datafile.c
|
||||
database/engine/datafile.h
|
||||
database/engine/journalfile.c
|
||||
database/engine/journalfile.h
|
||||
database/engine/rrdenginelib.c
|
||||
database/engine/rrdenginelib.h
|
||||
database/engine/rrdengineapi.c
|
||||
database/engine/rrdengineapi.h
|
||||
database/engine/pagecache.c
|
||||
database/engine/pagecache.h
|
||||
)
|
||||
|
||||
set(WEB_PLUGIN_FILES
|
||||
|
|
22
Makefile.am
22
Makefile.am
|
@ -311,6 +311,24 @@ RRD_PLUGIN_FILES = \
|
|||
database/rrdvar.h \
|
||||
$(NULL)
|
||||
|
||||
if ENABLE_DBENGINE
|
||||
RRD_PLUGIN_FILES += \
|
||||
database/engine/rrdengine.c \
|
||||
database/engine/rrdengine.h \
|
||||
database/engine/rrddiskprotocol.h \
|
||||
database/engine/datafile.c \
|
||||
database/engine/datafile.h \
|
||||
database/engine/journalfile.c \
|
||||
database/engine/journalfile.h \
|
||||
database/engine/rrdenginelib.c \
|
||||
database/engine/rrdenginelib.h \
|
||||
database/engine/rrdengineapi.c \
|
||||
database/engine/rrdengineapi.h \
|
||||
database/engine/pagecache.c \
|
||||
database/engine/pagecache.h \
|
||||
$(NULL)
|
||||
endif
|
||||
|
||||
API_PLUGIN_FILES = \
|
||||
web/api/badges/web_buffer_svg.c \
|
||||
web/api/badges/web_buffer_svg.h \
|
||||
|
@ -477,6 +495,10 @@ NETDATA_COMMON_LIBS = \
|
|||
$(OPTIONAL_MATH_LIBS) \
|
||||
$(OPTIONAL_ZLIB_LIBS) \
|
||||
$(OPTIONAL_UUID_LIBS) \
|
||||
$(OPTIONAL_UV_LIBS) \
|
||||
$(OPTIONAL_LZ4_LIBS) \
|
||||
$(OPTIONAL_JUDY_LIBS) \
|
||||
$(OPTIONAL_SSL_LIBS) \
|
||||
$(NULL)
|
||||
# TODO: Find more graceful way to add libs for AWS Kinesis
|
||||
|
||||
|
|
|
@ -62,9 +62,11 @@ calculated_number backend_calculate_value_from_stored_data(
|
|||
(void)host;
|
||||
|
||||
// find the edges of the rrd database for this chart
|
||||
time_t first_t = rrdset_first_entry_t(st);
|
||||
time_t last_t = rrdset_last_entry_t(st);
|
||||
time_t first_t = rd->state->query_ops.oldest_time(rd);
|
||||
time_t last_t = rd->state->query_ops.latest_time(rd);
|
||||
time_t update_every = st->update_every;
|
||||
struct rrddim_query_handle handle;
|
||||
storage_number n;
|
||||
|
||||
// step back a little, to make sure we have complete data collection
|
||||
// for all metrics
|
||||
|
@ -105,6 +107,7 @@ calculated_number backend_calculate_value_from_stored_data(
|
|||
size_t counter = 0;
|
||||
calculated_number sum = 0;
|
||||
|
||||
/*
|
||||
long start_at_slot = rrdset_time2slot(st, before),
|
||||
stop_at_slot = rrdset_time2slot(st, after),
|
||||
slot, stop_now = 0;
|
||||
|
@ -126,7 +129,21 @@ calculated_number backend_calculate_value_from_stored_data(
|
|||
|
||||
counter++;
|
||||
}
|
||||
*/
|
||||
for(rd->state->query_ops.init(rd, &handle, before, after) ; !rd->state->query_ops.is_finished(&handle) ; ) {
|
||||
n = rd->state->query_ops.next_metric(&handle);
|
||||
|
||||
if(unlikely(!does_storage_number_exist(n))) {
|
||||
// not collected
|
||||
continue;
|
||||
}
|
||||
|
||||
calculated_number value = unpack_storage_number(n);
|
||||
sum += value;
|
||||
|
||||
counter++;
|
||||
}
|
||||
rd->state->query_ops.finalize(&handle);
|
||||
if(unlikely(!counter)) {
|
||||
debug(D_BACKEND, "BACKEND: %s.%s.%s: no values stored in database for range %lu to %lu",
|
||||
host->hostname, st->id, rd->id,
|
||||
|
|
93
configure.ac
93
configure.ac
|
@ -131,6 +131,12 @@ AC_ARG_ENABLE(
|
|||
,
|
||||
[enable_lto="detect"]
|
||||
)
|
||||
AC_ARG_ENABLE(
|
||||
[dbengine],
|
||||
[AS_HELP_STRING([--disable-dbengine], [disable netdata dbengine @<:@default autodetect@:>@])],
|
||||
,
|
||||
[enable_dbengine="detect"]
|
||||
)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
@ -188,7 +194,7 @@ case "$host_os" in
|
|||
freebsd*)
|
||||
build_target=freebsd
|
||||
build_target_id=2
|
||||
CFLAGS="${CFLAGS} -I/usr/local/include"
|
||||
CFLAGS="${CFLAGS} -I/usr/local/include -L/usr/local/lib"
|
||||
;;
|
||||
darwin*)
|
||||
build_target=macos
|
||||
|
@ -242,6 +248,46 @@ fi
|
|||
AC_MSG_RESULT([${with_math}])
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# libuv multi-platform support library with a focus on asynchronous I/O
|
||||
# TODO: check version, uv_fs_scandir_next only available in version >= 1.0
|
||||
|
||||
AC_CHECK_LIB(
|
||||
[uv],
|
||||
[uv_fs_scandir_next],
|
||||
[UV_LIBS="-luv"]
|
||||
)
|
||||
|
||||
OPTIONAL_UV_CLFAGS="${UV_CFLAGS}"
|
||||
OPTIONAL_UV_LIBS="${UV_LIBS}"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# lz4 Extremely Fast Compression algorithm
|
||||
|
||||
AC_CHECK_LIB(
|
||||
[lz4],
|
||||
[LZ4_decompress_safe],
|
||||
[LZ4_LIBS="-llz4"]
|
||||
)
|
||||
|
||||
OPTIONAL_LZ4_CLFAGS="${LZ4_CFLAGS}"
|
||||
OPTIONAL_LZ4_LIBS="${LZ4_LIBS}"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Judy General purpose dynamic array
|
||||
|
||||
AC_CHECK_LIB(
|
||||
[Judy],
|
||||
[JudyLIns],
|
||||
[JUDY_LIBS="-lJudy"]
|
||||
)
|
||||
|
||||
OPTIONAL_JUDY_CLFAGS="${JUDY_CFLAGS}"
|
||||
OPTIONAL_JUDY_LIBS="${JUDY_LIBS}"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# zlib
|
||||
|
||||
|
@ -279,6 +325,43 @@ OPTIONAL_UUID_CFLAGS="${UUID_CFLAGS}"
|
|||
OPTIONAL_UUID_LIBS="${UUID_LIBS}"
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# OpenSSL Cryptography and SSL/TLS Toolkit
|
||||
|
||||
AC_CHECK_LIB(
|
||||
[crypto],
|
||||
[SHA256_Init],
|
||||
[SSL_LIBS="-lcrypto -lssl"]
|
||||
)
|
||||
|
||||
OPTIONAL_SSL_CLFAGS="${SSL_CFLAGS}"
|
||||
OPTIONAL_SSL_LIBS="${SSL_LIBS}"
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# DB engine
|
||||
test "${enable_dbengine}" = "yes" -a -z "${UV_LIBS}" && \
|
||||
AC_MSG_ERROR([libuv required but not found. Try installing 'libuv1-dev' or 'libuv-devel'.])
|
||||
|
||||
test "${enable_dbengine}" = "yes" -a -z "${LZ4_LIBS}" && \
|
||||
AC_MSG_ERROR([liblz4 required but not found. Try installing 'liblz4-dev' or 'lz4-devel'.])
|
||||
|
||||
test "${enable_dbengine}" = "yes" -a -z "${JUDY_LIBS}" && \
|
||||
AC_MSG_ERROR([libJudy required but not found. Try installing 'libjudy-dev' or 'Judy-devel'.])
|
||||
|
||||
test "${enable_dbengine}" = "yes" -a -z "${SSL_LIBS}" && \
|
||||
AC_MSG_ERROR([OpenSSL required but not found. Try installing 'libssl-dev' or 'openssl-devel'.])
|
||||
|
||||
AC_MSG_CHECKING([if netdata dbengine should be used])
|
||||
if test "${enable_dbengine}" != "no" -a "${UV_LIBS}" -a "${LZ4_LIBS}" -a "${JUDY_LIBS}" -a "${SSL_LIBS}"; then
|
||||
enable_dbengine="yes"
|
||||
AC_DEFINE([ENABLE_DBENGINE], [1], [netdata dbengine usability])
|
||||
else
|
||||
enable_dbengine="no"
|
||||
fi
|
||||
AC_MSG_RESULT([${enable_dbengine}])
|
||||
AM_CONDITIONAL([ENABLE_DBENGINE], [test "${enable_dbengine}" = "yes"])
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# compiler options
|
||||
|
||||
|
@ -781,7 +864,12 @@ CPPFLAGS="\
|
|||
|
||||
AC_SUBST([OPTIONAL_MATH_CFLAGS])
|
||||
AC_SUBST([OPTIONAL_MATH_LIBS])
|
||||
AC_SUBST([OPTIONAL_NFACCT_CFLAGS])
|
||||
AC_SUBST([OPTIONAL_RT_CLFAGS])
|
||||
AC_SUBST([OPTIONAL_UV_LIBS])
|
||||
AC_SUBST([OPTIONAL_LZ4_LIBS])
|
||||
AC_SUBST([OPTIONAL_JUDY_LIBS])
|
||||
AC_SUBST([OPTIONAL_SSL_LIBS])
|
||||
AC_SUBST([OPTIONAL_NFACCT_CLFAGS])
|
||||
AC_SUBST([OPTIONAL_NFACCT_LIBS])
|
||||
AC_SUBST([OPTIONAL_ZLIB_CFLAGS])
|
||||
AC_SUBST([OPTIONAL_ZLIB_LIBS])
|
||||
|
@ -831,6 +919,7 @@ AC_CONFIG_FILES([
|
|||
collectors/xenstat.plugin/Makefile
|
||||
daemon/Makefile
|
||||
database/Makefile
|
||||
database/engine/Makefile
|
||||
diagrams/Makefile
|
||||
health/Makefile
|
||||
health/notifications/Makefile
|
||||
|
|
|
@ -164,6 +164,8 @@ The command line options of the netdata 1.10.0 version are the following:
|
|||
|
||||
-W unittest Run internal unittests and exit.
|
||||
|
||||
-W createdataset=N Create a DB engine dataset of N seconds and exit.
|
||||
|
||||
-W set section option value
|
||||
set netdata.conf option from the command line.
|
||||
|
||||
|
|
|
@ -57,7 +57,7 @@ cache directory | `/var/cache/netdata` | The directory the memory database will
|
|||
lib directory | `/var/lib/netdata` | Contains the alarm log and the netdata instance guid.
|
||||
home directory | `/var/cache/netdata` | Contains the db files for the collected metrics
|
||||
plugins directory | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes.
|
||||
memory mode | `save` | When set to `save` netdata will save its round robin database on exit and load it on startup. When set to `map` the cache files will be updated in real time (check `man mmap` - do not set this on systems with heavy load or slow disks - the disks will continuously sync the in-memory database of netdata). When set to `ram` the round robin database will be temporary and it will be lost when netdata exits. `none` disables the database at this host. This also disables health monitoring (there cannot be health monitoring without a database). host access prefix | | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43).
|
||||
memory mode | `save` | When set to `save` netdata will save its round robin database on exit and load it on startup. When set to `map` the cache files will be updated in real time (check `man mmap` - do not set this on systems with heavy load or slow disks - the disks will continuously sync the in-memory database of netdata). When set to `dbengine` it behaves similarly to `map` but with much better disk and memory efficiency, however, with higher overhead. When set to `ram` the round robin database will be temporary and it will be lost when netdata exits. `none` disables the database at this host. This also disables health monitoring (there cannot be health monitoring without a database). host access prefix | | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43).
|
||||
memory deduplication (ksm) | `yes` | When set to `yes`, netdata will offer its in-memory round robin database to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](../../database/#ksm)
|
||||
TZ environment variable | `:/etc/localtime` | Where to find the timezone
|
||||
timezone | auto-detected | The timezone retrieved from the environment variable
|
||||
|
|
|
@ -530,4 +530,223 @@ void global_statistics_charts(void) {
|
|||
|
||||
rrdset_done(st_rrdr_points);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
#ifdef ENABLE_DBENGINE
|
||||
if (localhost->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
unsigned long long stats_array[27];
|
||||
|
||||
/* get localhost's DB engine's statistics */
|
||||
rrdeng_get_27_statistics(localhost->rrdeng_ctx, stats_array);
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
{
|
||||
static RRDSET *st_compression = NULL;
|
||||
static RRDDIM *rd_savings = NULL;
|
||||
|
||||
if (unlikely(!st_compression)) {
|
||||
st_compression = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "dbengine_compression_ratio"
|
||||
, NULL
|
||||
, "dbengine"
|
||||
, NULL
|
||||
, "NetData DB engine data extents' compression savings ratio"
|
||||
, "percentage"
|
||||
, "netdata"
|
||||
, "stats"
|
||||
, 130502
|
||||
, localhost->rrd_update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
|
||||
rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
else
|
||||
rrdset_next(st_compression);
|
||||
|
||||
unsigned long long ratio;
|
||||
unsigned long long compressed_content_size = stats_array[12];
|
||||
unsigned long long content_size = stats_array[11];
|
||||
|
||||
if (content_size) {
|
||||
// allow negative savings
|
||||
ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size;
|
||||
} else {
|
||||
ratio = 0;
|
||||
}
|
||||
rrddim_set_by_pointer(st_compression, rd_savings, ratio);
|
||||
|
||||
rrdset_done(st_compression);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
{
|
||||
static RRDSET *st_pg_cache_hit_ratio = NULL;
|
||||
static RRDDIM *rd_hit_ratio = NULL;
|
||||
|
||||
if (unlikely(!st_pg_cache_hit_ratio)) {
|
||||
st_pg_cache_hit_ratio = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "page_cache_hit_ratio"
|
||||
, NULL
|
||||
, "dbengine"
|
||||
, NULL
|
||||
, "NetData DB engine page cache hit ratio"
|
||||
, "percentage"
|
||||
, "netdata"
|
||||
, "stats"
|
||||
, 130503
|
||||
, localhost->rrd_update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
|
||||
rd_hit_ratio = rrddim_add(st_pg_cache_hit_ratio, "ratio", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
else
|
||||
rrdset_next(st_pg_cache_hit_ratio);
|
||||
|
||||
static unsigned long long old_hits = 0;
|
||||
static unsigned long long old_misses = 0;
|
||||
unsigned long long hits = stats_array[7];
|
||||
unsigned long long misses = stats_array[8];
|
||||
unsigned long long hits_delta;
|
||||
unsigned long long misses_delta;
|
||||
unsigned long long ratio;
|
||||
|
||||
hits_delta = hits - old_hits;
|
||||
misses_delta = misses - old_misses;
|
||||
old_hits = hits;
|
||||
old_misses = misses;
|
||||
|
||||
if (hits_delta + misses_delta) {
|
||||
// allow negative savings
|
||||
ratio = (hits_delta * 100 * 1000) / (hits_delta + misses_delta);
|
||||
} else {
|
||||
ratio = 0;
|
||||
}
|
||||
rrddim_set_by_pointer(st_pg_cache_hit_ratio, rd_hit_ratio, ratio);
|
||||
|
||||
rrdset_done(st_pg_cache_hit_ratio);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
{
|
||||
static RRDSET *st_pg_cache_pages = NULL;
|
||||
static RRDDIM *rd_populated = NULL;
|
||||
static RRDDIM *rd_commited = NULL;
|
||||
static RRDDIM *rd_insertions = NULL;
|
||||
static RRDDIM *rd_deletions = NULL;
|
||||
static RRDDIM *rd_backfills = NULL;
|
||||
static RRDDIM *rd_evictions = NULL;
|
||||
|
||||
if (unlikely(!st_pg_cache_pages)) {
|
||||
st_pg_cache_pages = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "page_cache_stats"
|
||||
, NULL
|
||||
, "dbengine"
|
||||
, NULL
|
||||
, "NetData DB engine page statistics"
|
||||
, "pages"
|
||||
, "netdata"
|
||||
, "stats"
|
||||
, 130504
|
||||
, localhost->rrd_update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
|
||||
rd_populated = rrddim_add(st_pg_cache_pages, "populated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_commited = rrddim_add(st_pg_cache_pages, "commited", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_insertions = rrddim_add(st_pg_cache_pages, "insertions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_deletions = rrddim_add(st_pg_cache_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_backfills = rrddim_add(st_pg_cache_pages, "backfills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_evictions = rrddim_add(st_pg_cache_pages, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
else
|
||||
rrdset_next(st_pg_cache_pages);
|
||||
|
||||
rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]);
|
||||
rrddim_set_by_pointer(st_pg_cache_pages, rd_commited, (collected_number)stats_array[4]);
|
||||
rrddim_set_by_pointer(st_pg_cache_pages, rd_insertions, (collected_number)stats_array[5]);
|
||||
rrddim_set_by_pointer(st_pg_cache_pages, rd_deletions, (collected_number)stats_array[6]);
|
||||
rrddim_set_by_pointer(st_pg_cache_pages, rd_backfills, (collected_number)stats_array[9]);
|
||||
rrddim_set_by_pointer(st_pg_cache_pages, rd_evictions, (collected_number)stats_array[10]);
|
||||
rrdset_done(st_pg_cache_pages);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
{
|
||||
static RRDSET *st_io_stats = NULL;
|
||||
static RRDDIM *rd_reads = NULL;
|
||||
static RRDDIM *rd_writes = NULL;
|
||||
|
||||
if (unlikely(!st_io_stats)) {
|
||||
st_io_stats = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "dbengine_io_throughput"
|
||||
, NULL
|
||||
, "dbengine"
|
||||
, NULL
|
||||
, "NetData DB engine I/O throughput"
|
||||
, "MiB/s"
|
||||
, "netdata"
|
||||
, "stats"
|
||||
, 130505
|
||||
, localhost->rrd_update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
|
||||
rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
else
|
||||
rrdset_next(st_io_stats);
|
||||
|
||||
rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]);
|
||||
rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]);
|
||||
rrdset_done(st_io_stats);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
{
|
||||
static RRDSET *st_io_stats = NULL;
|
||||
static RRDDIM *rd_reads = NULL;
|
||||
static RRDDIM *rd_writes = NULL;
|
||||
|
||||
if (unlikely(!st_io_stats)) {
|
||||
st_io_stats = rrdset_create_localhost(
|
||||
"netdata"
|
||||
, "dbengine_io_operations"
|
||||
, NULL
|
||||
, "dbengine"
|
||||
, NULL
|
||||
, "NetData DB engine I/O operations"
|
||||
, "operations/s"
|
||||
, "netdata"
|
||||
, "stats"
|
||||
, 130506
|
||||
, localhost->rrd_update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
|
||||
rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
else
|
||||
rrdset_next(st_io_stats);
|
||||
|
||||
rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]);
|
||||
rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]);
|
||||
rrdset_done(st_io_stats);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
|
|
@ -301,6 +301,7 @@ int help(int exitcode) {
|
|||
" -W stacksize=N Set the stacksize (in bytes).\n\n"
|
||||
" -W debug_flags=N Set runtime tracing to debug.log.\n\n"
|
||||
" -W unittest Run internal unittests and exit.\n\n"
|
||||
" -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
|
||||
" -W set section option value\n"
|
||||
" set netdata.conf option from the command line.\n\n"
|
||||
" -W simple-pattern pattern string\n"
|
||||
|
@ -471,6 +472,25 @@ static void get_netdata_configured_variables() {
|
|||
|
||||
default_rrd_memory_mode = rrd_memory_mode_id(config_get(CONFIG_SECTION_GLOBAL, "memory mode", rrd_memory_mode_name(default_rrd_memory_mode)));
|
||||
|
||||
#ifdef ENABLE_DBENGINE
|
||||
// ------------------------------------------------------------------------
|
||||
// get default Database Engine page cache size in MiB
|
||||
|
||||
default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_GLOBAL, "page cache size", default_rrdeng_page_cache_mb);
|
||||
if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) {
|
||||
error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB);
|
||||
default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// get default Database Engine disk space quota in MiB
|
||||
|
||||
default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_GLOBAL, "dbengine disk space", default_rrdeng_disk_quota_mb);
|
||||
if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) {
|
||||
error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB);
|
||||
default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB;
|
||||
}
|
||||
#endif
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
|
||||
|
@ -841,6 +861,7 @@ int main(int argc, char **argv) {
|
|||
{
|
||||
char* stacksize_string = "stacksize=";
|
||||
char* debug_flags_string = "debug_flags=";
|
||||
char* createdataset_string = "createdataset=";
|
||||
|
||||
if(strcmp(optarg, "unittest") == 0) {
|
||||
if(unit_test_buffer()) return 1;
|
||||
|
@ -853,9 +874,23 @@ int main(int argc, char **argv) {
|
|||
default_rrdpush_enabled = 0;
|
||||
if(run_all_mockup_tests()) return 1;
|
||||
if(unit_test_storage()) return 1;
|
||||
#ifdef ENABLE_DBENGINE
|
||||
if(test_dbengine()) return 1;
|
||||
#endif
|
||||
fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
|
||||
return 0;
|
||||
}
|
||||
else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
|
||||
unsigned history_seconds;
|
||||
|
||||
optarg += strlen(createdataset_string);
|
||||
history_seconds = (unsigned )strtoull(optarg, NULL, 0);
|
||||
|
||||
#ifdef ENABLE_DBENGINE
|
||||
generate_dbengine_dataset(history_seconds);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
else if(strcmp(optarg, "simple-pattern") == 0) {
|
||||
if(optind + 2 > argc) {
|
||||
fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n"
|
||||
|
@ -1138,7 +1173,6 @@ int main(int argc, char **argv) {
|
|||
|
||||
rrd_init(netdata_configured_hostname, system_info);
|
||||
rrdhost_system_info_free(system_info);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// enable log flood protection
|
||||
|
||||
|
|
|
@ -1566,3 +1566,215 @@ int unit_test(long delay, long shift)
|
|||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DBENGINE
|
||||
static inline void rrddim_set_by_pointer_fake_time(RRDDIM *rd, collected_number value, time_t now)
|
||||
{
|
||||
rd->last_collected_time.tv_sec = now;
|
||||
rd->last_collected_time.tv_usec = 0;
|
||||
rd->collected_value = value;
|
||||
rd->updated = 1;
|
||||
|
||||
rd->collections_counter++;
|
||||
|
||||
collected_number v = (value >= 0) ? value : -value;
|
||||
if(unlikely(v > rd->collected_value_max)) rd->collected_value_max = v;
|
||||
}
|
||||
|
||||
int test_dbengine(void)
|
||||
{
|
||||
const int CHARTS = 128;
|
||||
const int DIMS = 16; /* That gives us 2048 metrics */
|
||||
const int POINTS = 16384; /* This produces 128MiB of metric data */
|
||||
const int QUERY_BATCH = 4096;
|
||||
uint8_t same;
|
||||
int i, j, k, c, errors;
|
||||
RRDHOST *host = NULL;
|
||||
RRDSET *st[CHARTS];
|
||||
RRDDIM *rd[CHARTS][DIMS];
|
||||
char name[101];
|
||||
time_t time_now;
|
||||
collected_number last;
|
||||
struct rrddim_query_handle handle;
|
||||
calculated_number value, expected;
|
||||
storage_number n;
|
||||
|
||||
error_log_limit_unlimited();
|
||||
fprintf(stderr, "\nRunning DB-engine test\n");
|
||||
|
||||
default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
|
||||
|
||||
debug(D_RRDHOST, "Initializing localhost with hostname 'unittest-dbengine'");
|
||||
host = rrdhost_find_or_create(
|
||||
"unittest-dbengine"
|
||||
, "unittest-dbengine"
|
||||
, "unittest-dbengine"
|
||||
, os_type
|
||||
, netdata_configured_timezone
|
||||
, config_get(CONFIG_SECTION_BACKEND, "host tags", "")
|
||||
, program_name
|
||||
, program_version
|
||||
, default_rrd_update_every
|
||||
, default_rrd_history_entries
|
||||
, RRD_MEMORY_MODE_DBENGINE
|
||||
, default_health_enabled
|
||||
, default_rrdpush_enabled
|
||||
, default_rrdpush_destination
|
||||
, default_rrdpush_api_key
|
||||
, default_rrdpush_send_charts_matching
|
||||
, NULL
|
||||
);
|
||||
if (NULL == host)
|
||||
return 1;
|
||||
|
||||
for (i = 0 ; i < CHARTS ; ++i) {
|
||||
snprintfz(name, 100, "dbengine-chart-%d", i);
|
||||
|
||||
// create the chart
|
||||
st[i] = rrdset_create(host, "netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest",
|
||||
NULL, 1, 1, RRDSET_TYPE_LINE);
|
||||
rrdset_flag_set(st[i], RRDSET_FLAG_DEBUG);
|
||||
rrdset_flag_set(st[i], RRDSET_FLAG_STORE_FIRST);
|
||||
for (j = 0 ; j < DIMS ; ++j) {
|
||||
snprintfz(name, 100, "dim-%d", j);
|
||||
|
||||
rd[i][j] = rrddim_add(st[i], name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
}
|
||||
|
||||
// feed it with the test data
|
||||
time_now = 1;
|
||||
last = 0;
|
||||
for (i = 0 ; i < CHARTS ; ++i) {
|
||||
for (j = 0 ; j < DIMS ; ++j) {
|
||||
rd[i][j]->last_collected_time.tv_sec =
|
||||
st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = time_now;
|
||||
rd[i][j]->last_collected_time.tv_usec =
|
||||
st[i]->last_collected_time.tv_usec = st[i]->last_updated.tv_usec = 0;
|
||||
}
|
||||
}
|
||||
for(c = 0; c < POINTS ; ++c) {
|
||||
++time_now; // time_now = c + 2
|
||||
for (i = 0 ; i < CHARTS ; ++i) {
|
||||
st[i]->usec_since_last_update = USEC_PER_SEC;
|
||||
|
||||
for (j = 0; j < DIMS; ++j) {
|
||||
last = i * DIMS * POINTS + j * POINTS + c;
|
||||
rrddim_set_by_pointer_fake_time(rd[i][j], last, time_now);
|
||||
}
|
||||
rrdset_done(st[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// check the result
|
||||
errors = 0;
|
||||
|
||||
for(c = 0; c < POINTS ; c += QUERY_BATCH) {
|
||||
time_now = c + 2;
|
||||
for (i = 0 ; i < CHARTS ; ++i) {
|
||||
for (j = 0; j < DIMS; ++j) {
|
||||
rd[i][j]->state->query_ops.init(rd[i][j], &handle, time_now, time_now + QUERY_BATCH);
|
||||
for (k = 0; k < QUERY_BATCH; ++k) {
|
||||
last = i * DIMS * POINTS + j * POINTS + c + k;
|
||||
expected = unpack_storage_number(pack_storage_number((calculated_number)last, SN_EXISTS));
|
||||
|
||||
n = rd[i][j]->state->query_ops.next_metric(&handle);
|
||||
value = unpack_storage_number(n);
|
||||
|
||||
same = (calculated_number_round(value * 10000000.0) == calculated_number_round(expected * 10000000.0)) ? 1 : 0;
|
||||
if(!same) {
|
||||
fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value "
|
||||
CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", ### E R R O R ###\n",
|
||||
st[i]->name, rd[i][j]->name, (unsigned long)time_now + k, expected, value);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
rd[i][j]->state->query_ops.finalize(&handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rrdeng_exit(host->rrdeng_ctx);
|
||||
rrd_wrlock();
|
||||
rrdhost_delete_charts(host);
|
||||
rrd_unlock();
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
void generate_dbengine_dataset(unsigned history_seconds)
|
||||
{
|
||||
const int DIMS = 128;
|
||||
const uint64_t EXPECTED_COMPRESSION_RATIO = 94;
|
||||
int j;
|
||||
RRDHOST *host = NULL;
|
||||
RRDSET *st;
|
||||
RRDDIM *rd[DIMS];
|
||||
char name[101];
|
||||
time_t time_current, time_present;
|
||||
|
||||
default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
|
||||
default_rrdeng_page_cache_mb = 128;
|
||||
/* Worst case for uncompressible data */
|
||||
default_rrdeng_disk_quota_mb = (((uint64_t)DIMS) * sizeof(storage_number) * history_seconds) / (1024 * 1024);
|
||||
default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
|
||||
|
||||
error_log_limit_unlimited();
|
||||
debug(D_RRDHOST, "Initializing localhost with hostname 'dbengine-dataset'");
|
||||
|
||||
host = rrdhost_find_or_create(
|
||||
"dbengine-dataset"
|
||||
, "dbengine-dataset"
|
||||
, "dbengine-dataset"
|
||||
, os_type
|
||||
, netdata_configured_timezone
|
||||
, config_get(CONFIG_SECTION_BACKEND, "host tags", "")
|
||||
, program_name
|
||||
, program_version
|
||||
, default_rrd_update_every
|
||||
, default_rrd_history_entries
|
||||
, RRD_MEMORY_MODE_DBENGINE
|
||||
, default_health_enabled
|
||||
, default_rrdpush_enabled
|
||||
, default_rrdpush_destination
|
||||
, default_rrdpush_api_key
|
||||
, default_rrdpush_send_charts_matching
|
||||
, NULL
|
||||
);
|
||||
if (NULL == host)
|
||||
return;
|
||||
|
||||
fprintf(stderr, "\nRunning DB-engine workload generator\n");
|
||||
|
||||
// create the chart
|
||||
st = rrdset_create(host, "example", "random", "random", "example", NULL, "random", "random", "random",
|
||||
NULL, 1, 1, RRDSET_TYPE_LINE);
|
||||
for (j = 0 ; j < DIMS ; ++j) {
|
||||
snprintfz(name, 100, "random%d", j);
|
||||
|
||||
rd[j] = rrddim_add(st, name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
time_present = now_realtime_sec();
|
||||
// feed it with the test data
|
||||
time_current = time_present - history_seconds;
|
||||
for (j = 0 ; j < DIMS ; ++j) {
|
||||
rd[j]->last_collected_time.tv_sec =
|
||||
st->last_collected_time.tv_sec = st->last_updated.tv_sec = time_current;
|
||||
rd[j]->last_collected_time.tv_usec =
|
||||
st->last_collected_time.tv_usec = st->last_updated.tv_usec = 0;
|
||||
}
|
||||
for( ; time_current < time_present; ++time_current) {
|
||||
st->usec_since_last_update = USEC_PER_SEC;
|
||||
|
||||
for (j = 0; j < DIMS; ++j) {
|
||||
rrddim_set_by_pointer_fake_time(rd[j], (time_current + j) % 128, time_current);
|
||||
}
|
||||
rrdset_done(st);
|
||||
}
|
||||
rrd_wrlock();
|
||||
rrdhost_free(host);
|
||||
rrd_unlock();
|
||||
|
||||
}
|
||||
#endif
|
|
@ -8,5 +8,9 @@ extern int unit_test(long delay, long shift);
|
|||
extern int run_all_mockup_tests(void);
|
||||
extern int unit_test_str2ld(void);
|
||||
extern int unit_test_buffer(void);
|
||||
#ifdef ENABLE_DBENGINE
|
||||
extern int test_dbengine(void);
|
||||
extern void generate_dbengine_dataset(unsigned history_seconds);
|
||||
#endif
|
||||
|
||||
#endif /* NETDATA_UNIT_TEST_H */
|
||||
|
|
|
@ -3,6 +3,10 @@
|
|||
AUTOMAKE_OPTIONS = subdir-objects
|
||||
MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
|
||||
|
||||
SUBDIRS = \
|
||||
engine \
|
||||
$(NULL)
|
||||
|
||||
dist_noinst_DATA = \
|
||||
README.md \
|
||||
$(NULL)
|
||||
|
|
|
@ -17,12 +17,13 @@ to 1 second. You will have just one hour of data.
|
|||
For a day of data and 1.000 dimensions, you will need: 86.400 seconds * 4 bytes * 1.000
|
||||
dimensions = 345MB of RAM.
|
||||
|
||||
Currently the only option you have to lower this number is to use
|
||||
**[Memory Deduplication - Kernel Same Page Merging - KSM](#ksm)**.
|
||||
One option you have to lower this number is to use
|
||||
**[Memory Deduplication - Kernel Same Page Merging - KSM](#ksm)**. Another possibility is to
|
||||
use the **[Database Engine](engine/)**.
|
||||
|
||||
## Memory modes
|
||||
|
||||
Currently netdata supports 5 memory modes:
|
||||
Currently netdata supports 6 memory modes:
|
||||
|
||||
1. `ram`, data are purely in memory. Data are never saved on disk. This mode uses `mmap()` and
|
||||
supports [KSM](#ksm).
|
||||
|
@ -42,6 +43,12 @@ Currently netdata supports 5 memory modes:
|
|||
5. `alloc`, like `ram` but it uses `calloc()` and does not support [KSM](#ksm). This mode is the
|
||||
fallback for all others except `none`.
|
||||
|
||||
6. `dbengine`, data are in database files. The [Database Engine](engine/) works like a traditional
|
||||
database. There is some amount of RAM dedicated to data caching and indexing and the rest of
|
||||
the data reside compressed on disk. The number of history entries is not fixed in this case,
|
||||
but depends on the configured disk space and the effective compression ratio of the data stored.
|
||||
For more details see [here](engine/).
|
||||
|
||||
You can select the memory mode by editing netdata.conf and setting:
|
||||
|
||||
```
|
||||
|
@ -80,7 +87,7 @@ server that will maintain the entire database for all nodes, and will also run h
|
|||
for all nodes.
|
||||
|
||||
For this central netdata, memory size can be a problem. Fortunately, netdata supports several
|
||||
memory modes. What is interesting for this setup is `memory mode = map`.
|
||||
memory modes. One interesting option for this setup is `memory mode = map`.
|
||||
|
||||
In this mode, the database of netdata is stored in memory mapped files. netdata continues to read
|
||||
and write the database in memory, but the kernel automatically loads and saves memory pages from/to
|
||||
|
@ -88,7 +95,7 @@ disk.
|
|||
|
||||
**We suggest _not_ to use this mode on nodes that run other applications.** There will always be
|
||||
dirty memory to be synced and this syncing process may influence the way other applications work.
|
||||
This mode however is ideal when we need a central netdata server that would normally need huge
|
||||
This mode however is useful when we need a central netdata server that would normally need huge
|
||||
amounts of memory. Using memory mode `map` we can overcome all memory restrictions.
|
||||
|
||||
There are a few kernel options that provide finer control on the way this syncing works. But before
|
||||
|
@ -155,9 +162,24 @@ vm.dirty_ratio = 90
|
|||
vm.dirty_writeback_centisecs = 0
|
||||
```
|
||||
|
||||
There is another memory mode to help overcome the memory size problem. What is most interesting
|
||||
for this setup is `memory mode = dbengine`.
|
||||
|
||||
In this mode, the database of netdata is stored in database files. The [Database Engine](engine/)
|
||||
works like a traditional database. There is some amount of RAM dedicated to data caching and
|
||||
indexing and the rest of the data reside compressed on disk. The number of history entries is not
|
||||
fixed in this case, but depends on the configured disk space and the effective compression ratio
|
||||
of the data stored.
|
||||
|
||||
We suggest to use **this** mode on nodes that also run other applications. The Database Engine uses
|
||||
direct I/O to avoid polluting the OS filesystem caches and does not generate excessive I/O traffic
|
||||
so as to create the minimum possible interference with other applications. Using memory mode
|
||||
`dbengine` we can overcome most memory restrictions. For more details see [here](engine/).
|
||||
|
||||
## KSM
|
||||
|
||||
Netdata offers all its round robin database to kernel for deduplication.
|
||||
Netdata offers all its round robin database to kernel for deduplication
|
||||
(except for `memory mode = dbengine`).
|
||||
|
||||
In the past KSM has been criticized for consuming a lot of CPU resources.
|
||||
Although this is true when KSM is used for deduplicating certain applications, it is not true with
|
||||
|
|
8
database/engine/Makefile.am
Normal file
8
database/engine/Makefile.am
Normal file
|
@ -0,0 +1,8 @@
|
|||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
AUTOMAKE_OPTIONS = subdir-objects
|
||||
MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
|
||||
|
||||
dist_noinst_DATA = \
|
||||
README.md \
|
||||
$(NULL)
|
109
database/engine/README.md
Normal file
109
database/engine/README.md
Normal file
|
@ -0,0 +1,109 @@
|
|||
# Database engine
|
||||
|
||||
The Database Engine works like a traditional
|
||||
database. There is some amount of RAM dedicated to data caching and indexing and the rest of
|
||||
the data reside compressed on disk. The number of history entries is not fixed in this case,
|
||||
but depends on the configured disk space and the effective compression ratio of the data stored.
|
||||
|
||||
## Files
|
||||
|
||||
With the DB engine memory mode the metric data are stored in database files. These files are
|
||||
organized in pairs, the datafiles and their corresponding journalfiles, e.g.:
|
||||
|
||||
```
|
||||
datafile-1-0000000001.ndf
|
||||
journalfile-1-0000000001.njf
|
||||
datafile-1-0000000002.ndf
|
||||
journalfile-1-0000000002.njf
|
||||
datafile-1-0000000003.ndf
|
||||
journalfile-1-0000000003.njf
|
||||
...
|
||||
```
|
||||
|
||||
They are located under their host's cache directory in the directory `./dbengine`
|
||||
(e.g. for localhost the default location is `/var/cache/netdata/dbengine/*`). The higher
|
||||
numbered filenames contain more recent metric data. The user can safely delete some pairs
|
||||
of files when netdata is stopped to manually free up some space.
|
||||
|
||||
*Users should* **back up** *their `./dbengine` folders if they consider this data to be important.*
|
||||
|
||||
## Configuration
|
||||
|
||||
There is one DB engine instance per netdata host/node. That is, there is one `./dbengine` folder
|
||||
per node, and all charts of `dbengine` memory mode in such a host share the same storage space
|
||||
and DB engine instance memory state. You can select the memory mode for localhost by editing
|
||||
netdata.conf and setting:
|
||||
|
||||
```
|
||||
[global]
|
||||
memory mode = dbengine
|
||||
```
|
||||
|
||||
For setting the memory mode for the rest of the nodes you should look at
|
||||
[streaming](../../streaming/).
|
||||
|
||||
The `history` configuration option is meaningless for `memory mode = dbengine` and is ignored
|
||||
for any metrics being stored in the DB engine.
|
||||
|
||||
All DB engine instances, for localhost and all other streaming recipient nodes inherit their
|
||||
configuration from `netdata.conf`:
|
||||
|
||||
```
|
||||
[global]
|
||||
page cache size = 32
|
||||
dbengine disk space = 256
|
||||
```
|
||||
|
||||
The above values are the default and minimum values for Page Cache size and DB engine disk space
|
||||
quota. Both numbers are in **MiB**. All DB engine instances will allocate the configured resources
|
||||
separately.
|
||||
|
||||
The `page cache size` option determines the amount of RAM in **MiB** that is dedicated to caching
|
||||
netdata metric values themselves.
|
||||
|
||||
The `dbengine disk space` option determines the amount of disk space in **MiB** that is dedicated
|
||||
to storing netdata metric values and all related metadata describing them.
|
||||
|
||||
## Operation
|
||||
|
||||
The DB engine stores chart metric values in 4096-byte pages in memory. Each chart dimension gets
|
||||
its own page to store consecutive values generated from the data collectors. Those pages comprise
|
||||
the **Page Cache**.
|
||||
|
||||
When those pages fill up they are slowly compressed and flushed to disk.
|
||||
It can take `4096 / 4 = 1024 seconds = 17 minutes`, for a chart dimension that is being collected
|
||||
every 1 second, to fill a page. Pages can be cut short when we stop netdata or the DB engine
|
||||
instance so as to not lose the data. When we query the DB engine for data we trigger disk read
|
||||
I/O requests that fill the Page Cache with the requested pages and potentially evict cold
|
||||
(not recently used) pages.
|
||||
|
||||
When the disk quota is exceeded the oldest values are removed from the DB engine at real time, by
|
||||
automatically deleting the oldest datafile and journalfile pair. Any corresponding pages residing
|
||||
in the Page Cache will also be invalidated and removed. The DB engine logic will try to maintain
|
||||
between 10 and 20 file pairs at any point in time.
|
||||
|
||||
The Database Engine uses direct I/O to avoid polluting the OS filesystem caches and does not
|
||||
generate excessive I/O traffic so as to create the minimum possible interference with other
|
||||
applications.
|
||||
|
||||
## Memory requirements
|
||||
|
||||
Using memory mode `dbengine` we can overcome most memory restrictions and store a dataset that
|
||||
is much larger than the available memory.
|
||||
|
||||
There are explicit memory requirements **per** DB engine **instance**, meaning **per** netdata
|
||||
**node** (e.g. localhost and streaming recipient nodes):
|
||||
|
||||
- `page cache size` must be at least `#dimensions-being-collected x 4096 x 2` bytes.
|
||||
|
||||
- an additional `#pages-on-disk x 4096 x 0.06` bytes of RAM are allocated for metadata.
|
||||
|
||||
- roughly speaking this is 6% of the uncompressed disk space taken by the DB files.
|
||||
|
||||
- for very highly compressible data (compression ratio > 90%) this RAM overhead
|
||||
is comparable to the disk space footprint.
|
||||
|
||||
An important observation is that RAM usage depends on both the `page cache size` and the
|
||||
`dbengine disk space` options.
|
||||
|
||||
[]()
|
335
database/engine/datafile.c
Normal file
335
database/engine/datafile.c
Normal file
|
@ -0,0 +1,335 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#include "rrdengine.h"
|
||||
|
||||
void df_extent_insert(struct extent_info *extent)
|
||||
{
|
||||
struct rrdengine_datafile *datafile = extent->datafile;
|
||||
|
||||
if (likely(NULL != datafile->extents.last)) {
|
||||
datafile->extents.last->next = extent;
|
||||
}
|
||||
if (unlikely(NULL == datafile->extents.first)) {
|
||||
datafile->extents.first = extent;
|
||||
}
|
||||
datafile->extents.last = extent;
|
||||
}
|
||||
|
||||
void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile)
|
||||
{
|
||||
if (likely(NULL != ctx->datafiles.last)) {
|
||||
ctx->datafiles.last->next = datafile;
|
||||
}
|
||||
if (unlikely(NULL == ctx->datafiles.first)) {
|
||||
ctx->datafiles.first = datafile;
|
||||
}
|
||||
ctx->datafiles.last = datafile;
|
||||
}
|
||||
|
||||
void datafile_list_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile)
|
||||
{
|
||||
struct rrdengine_datafile *next;
|
||||
|
||||
next = datafile->next;
|
||||
assert((NULL != next) && (ctx->datafiles.first == datafile) && (ctx->datafiles.last != datafile));
|
||||
ctx->datafiles.first = next;
|
||||
}
|
||||
|
||||
|
||||
static void datafile_init(struct rrdengine_datafile *datafile, struct rrdengine_instance *ctx,
|
||||
unsigned tier, unsigned fileno)
|
||||
{
|
||||
assert(tier == 1);
|
||||
datafile->tier = tier;
|
||||
datafile->fileno = fileno;
|
||||
datafile->file = (uv_file)0;
|
||||
datafile->pos = 0;
|
||||
datafile->extents.first = datafile->extents.last = NULL; /* will be populated by journalfile */
|
||||
datafile->journalfile = NULL;
|
||||
datafile->next = NULL;
|
||||
datafile->ctx = ctx;
|
||||
}
|
||||
|
||||
static void generate_datafilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen)
|
||||
{
|
||||
(void) snprintf(str, maxlen, "%s/" DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION,
|
||||
datafile->ctx->dbfiles_path, datafile->tier, datafile->fileno);
|
||||
}
|
||||
|
||||
int destroy_data_file(struct rrdengine_datafile *datafile)
|
||||
{
|
||||
struct rrdengine_instance *ctx = datafile->ctx;
|
||||
uv_fs_t req;
|
||||
int ret, fd;
|
||||
char path[1024];
|
||||
|
||||
ret = uv_fs_ftruncate(NULL, &req, datafile->file, 0, NULL);
|
||||
if (ret < 0) {
|
||||
fatal("uv_fs_ftruncate: %s", uv_strerror(ret));
|
||||
}
|
||||
assert(0 == req.result);
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
ret = uv_fs_close(NULL, &req, datafile->file, NULL);
|
||||
if (ret < 0) {
|
||||
fatal("uv_fs_close: %s", uv_strerror(ret));
|
||||
}
|
||||
assert(0 == req.result);
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
generate_datafilepath(datafile, path, sizeof(path));
|
||||
fd = uv_fs_unlink(NULL, &req, path, NULL);
|
||||
if (fd < 0) {
|
||||
fatal("uv_fs_fsunlink: %s", uv_strerror(fd));
|
||||
}
|
||||
assert(0 == req.result);
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
++ctx->stats.datafile_deletions;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int create_data_file(struct rrdengine_datafile *datafile)
|
||||
{
|
||||
struct rrdengine_instance *ctx = datafile->ctx;
|
||||
uv_fs_t req;
|
||||
uv_file file;
|
||||
int ret, fd;
|
||||
struct rrdeng_df_sb *superblock;
|
||||
uv_buf_t iov;
|
||||
char path[1024];
|
||||
|
||||
generate_datafilepath(datafile, path, sizeof(path));
|
||||
fd = uv_fs_open(NULL, &req, path, O_DIRECT | O_CREAT | O_RDWR | O_TRUNC,
|
||||
S_IRUSR | S_IWUSR, NULL);
|
||||
if (fd < 0) {
|
||||
fatal("uv_fs_fsopen: %s", uv_strerror(fd));
|
||||
}
|
||||
assert(req.result >= 0);
|
||||
file = req.result;
|
||||
uv_fs_req_cleanup(&req);
|
||||
#ifdef __APPLE__
|
||||
info("Disabling OS X caching for file \"%s\".", path);
|
||||
fcntl(fd, F_NOCACHE, 1);
|
||||
#endif
|
||||
|
||||
ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock));
|
||||
if (unlikely(ret)) {
|
||||
fatal("posix_memalign:%s", strerror(ret));
|
||||
}
|
||||
(void) strncpy(superblock->magic_number, RRDENG_DF_MAGIC, RRDENG_MAGIC_SZ);
|
||||
(void) strncpy(superblock->version, RRDENG_DF_VER, RRDENG_VER_SZ);
|
||||
superblock->tier = 1;
|
||||
|
||||
iov = uv_buf_init((void *)superblock, sizeof(*superblock));
|
||||
|
||||
ret = uv_fs_write(NULL, &req, file, &iov, 1, 0, NULL);
|
||||
if (ret < 0) {
|
||||
fatal("uv_fs_write: %s", uv_strerror(ret));
|
||||
}
|
||||
if (req.result < 0) {
|
||||
fatal("uv_fs_write: %s", uv_strerror((int)req.result));
|
||||
}
|
||||
uv_fs_req_cleanup(&req);
|
||||
free(superblock);
|
||||
|
||||
datafile->file = file;
|
||||
datafile->pos = sizeof(*superblock);
|
||||
ctx->stats.io_write_bytes += sizeof(*superblock);
|
||||
++ctx->stats.io_write_requests;
|
||||
++ctx->stats.datafile_creations;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_data_file_superblock(uv_file file)
|
||||
{
|
||||
int ret;
|
||||
struct rrdeng_df_sb *superblock;
|
||||
uv_buf_t iov;
|
||||
uv_fs_t req;
|
||||
|
||||
ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock));
|
||||
if (unlikely(ret)) {
|
||||
fatal("posix_memalign:%s", strerror(ret));
|
||||
}
|
||||
iov = uv_buf_init((void *)superblock, sizeof(*superblock));
|
||||
|
||||
ret = uv_fs_read(NULL, &req, file, &iov, 1, 0, NULL);
|
||||
if (ret < 0) {
|
||||
error("uv_fs_read: %s", uv_strerror(ret));
|
||||
uv_fs_req_cleanup(&req);
|
||||
goto error;
|
||||
}
|
||||
assert(req.result >= 0);
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
if (strncmp(superblock->magic_number, RRDENG_DF_MAGIC, RRDENG_MAGIC_SZ) ||
|
||||
strncmp(superblock->version, RRDENG_DF_VER, RRDENG_VER_SZ) ||
|
||||
superblock->tier != 1) {
|
||||
error("File has invalid superblock.");
|
||||
ret = UV_EINVAL;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
error:
|
||||
free(superblock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int load_data_file(struct rrdengine_datafile *datafile)
|
||||
{
|
||||
struct rrdengine_instance *ctx = datafile->ctx;
|
||||
uv_fs_t req;
|
||||
uv_file file;
|
||||
int ret, fd;
|
||||
uint64_t file_size;
|
||||
char path[1024];
|
||||
|
||||
generate_datafilepath(datafile, path, sizeof(path));
|
||||
fd = uv_fs_open(NULL, &req, path, O_DIRECT | O_RDWR, S_IRUSR | S_IWUSR, NULL);
|
||||
if (fd < 0) {
|
||||
/* if (UV_ENOENT != fd) */
|
||||
error("uv_fs_fsopen: %s", uv_strerror(fd));
|
||||
uv_fs_req_cleanup(&req);
|
||||
return fd;
|
||||
}
|
||||
assert(req.result >= 0);
|
||||
file = req.result;
|
||||
uv_fs_req_cleanup(&req);
|
||||
#ifdef __APPLE__
|
||||
info("Disabling OS X caching for file \"%s\".", path);
|
||||
fcntl(fd, F_NOCACHE, 1);
|
||||
#endif
|
||||
info("Initializing data file \"%s\".", path);
|
||||
|
||||
ret = check_file_properties(file, &file_size, sizeof(struct rrdeng_df_sb));
|
||||
if (ret)
|
||||
goto error;
|
||||
file_size = ALIGN_BYTES_CEILING(file_size);
|
||||
|
||||
ret = check_data_file_superblock(file);
|
||||
if (ret)
|
||||
goto error;
|
||||
ctx->stats.io_read_bytes += sizeof(struct rrdeng_df_sb);
|
||||
++ctx->stats.io_read_requests;
|
||||
|
||||
datafile->file = file;
|
||||
datafile->pos = file_size;
|
||||
|
||||
info("Data file \"%s\" initialized (size:%"PRIu64").", path, file_size);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
(void) uv_fs_close(NULL, &req, file, NULL);
|
||||
uv_fs_req_cleanup(&req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scan_data_files_cmp(const void *a, const void *b)
|
||||
{
|
||||
struct rrdengine_datafile *file1, *file2;
|
||||
char path1[1024], path2[1024];
|
||||
|
||||
file1 = *(struct rrdengine_datafile **)a;
|
||||
file2 = *(struct rrdengine_datafile **)b;
|
||||
generate_datafilepath(file1, path1, sizeof(path1));
|
||||
generate_datafilepath(file2, path2, sizeof(path2));
|
||||
return strcmp(path1, path2);
|
||||
}
|
||||
|
||||
/* Returns number of datafiles that were loaded */
|
||||
static int scan_data_files(struct rrdengine_instance *ctx)
|
||||
{
|
||||
int ret;
|
||||
unsigned tier, no, matched_files, i,failed_to_load;
|
||||
static uv_fs_t req;
|
||||
uv_dirent_t dent;
|
||||
struct rrdengine_datafile **datafiles, *datafile;
|
||||
struct rrdengine_journalfile *journalfile;
|
||||
|
||||
ret = uv_fs_scandir(NULL, &req, ctx->dbfiles_path, 0, NULL);
|
||||
assert(ret >= 0);
|
||||
assert(req.result >= 0);
|
||||
info("Found %d files in path %s", ret, ctx->dbfiles_path);
|
||||
|
||||
datafiles = callocz(MIN(ret, MAX_DATAFILES), sizeof(*datafiles));
|
||||
for (matched_files = 0 ; UV_EOF != uv_fs_scandir_next(&req, &dent) && matched_files < MAX_DATAFILES ; ) {
|
||||
info("Scanning file \"%s\"", dent.name);
|
||||
ret = sscanf(dent.name, DATAFILE_PREFIX RRDENG_FILE_NUMBER_SCAN_TMPL DATAFILE_EXTENSION, &tier, &no);
|
||||
if (2 == ret) {
|
||||
info("Matched file \"%s\"", dent.name);
|
||||
datafile = mallocz(sizeof(*datafile));
|
||||
datafile_init(datafile, ctx, tier, no);
|
||||
datafiles[matched_files++] = datafile;
|
||||
}
|
||||
}
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
if (matched_files == MAX_DATAFILES) {
|
||||
error("Warning: hit maximum database engine file limit of %d files", MAX_DATAFILES);
|
||||
}
|
||||
qsort(datafiles, matched_files, sizeof(*datafiles), scan_data_files_cmp);
|
||||
for (failed_to_load = 0, i = 0 ; i < matched_files ; ++i) {
|
||||
datafile = datafiles[i];
|
||||
ret = load_data_file(datafile);
|
||||
if (0 != ret) {
|
||||
free(datafile);
|
||||
++failed_to_load;
|
||||
continue;
|
||||
}
|
||||
journalfile = mallocz(sizeof(*journalfile));
|
||||
datafile->journalfile = journalfile;
|
||||
journalfile_init(journalfile, datafile);
|
||||
ret = load_journal_file(ctx, journalfile, datafile);
|
||||
if (0 != ret) {
|
||||
free(datafile);
|
||||
free(journalfile);
|
||||
++failed_to_load;
|
||||
continue;
|
||||
}
|
||||
datafile_list_insert(ctx, datafile);
|
||||
ctx->disk_space += datafile->pos + journalfile->pos;
|
||||
}
|
||||
if (failed_to_load) {
|
||||
error("%u files failed to load.", failed_to_load);
|
||||
}
|
||||
free(datafiles);
|
||||
|
||||
return matched_files - failed_to_load;
|
||||
}
|
||||
|
||||
/* Creates a datafile and a journalfile pair */
|
||||
void create_new_datafile_pair(struct rrdengine_instance *ctx, unsigned tier, unsigned fileno)
|
||||
{
|
||||
struct rrdengine_datafile *datafile;
|
||||
struct rrdengine_journalfile *journalfile;
|
||||
int ret;
|
||||
|
||||
info("Creating new data and journal files.");
|
||||
datafile = mallocz(sizeof(*datafile));
|
||||
datafile_init(datafile, ctx, tier, fileno);
|
||||
ret = create_data_file(datafile);
|
||||
assert(!ret);
|
||||
|
||||
journalfile = mallocz(sizeof(*journalfile));
|
||||
datafile->journalfile = journalfile;
|
||||
journalfile_init(journalfile, datafile);
|
||||
ret = create_journal_file(journalfile, datafile);
|
||||
assert(!ret);
|
||||
datafile_list_insert(ctx, datafile);
|
||||
ctx->disk_space += datafile->pos + journalfile->pos;
|
||||
}
|
||||
|
||||
/* Page cache must already be initialized. */
|
||||
int init_data_files(struct rrdengine_instance *ctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = scan_data_files(ctx);
|
||||
if (0 == ret) {
|
||||
info("Data files not found, creating.");
|
||||
create_new_datafile_pair(ctx, 1, 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
63
database/engine/datafile.h
Normal file
63
database/engine/datafile.h
Normal file
|
@ -0,0 +1,63 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_DATAFILE_H
|
||||
#define NETDATA_DATAFILE_H
|
||||
|
||||
#include "rrdengine.h"
|
||||
|
||||
/* Forward declarations */
|
||||
struct rrdengine_datafile;
|
||||
struct rrdengine_journalfile;
|
||||
struct rrdengine_instance;
|
||||
|
||||
#define DATAFILE_PREFIX "datafile-"
|
||||
#define DATAFILE_EXTENSION ".ndf"
|
||||
|
||||
#define MAX_DATAFILE_SIZE (1073741824LU)
|
||||
#define MIN_DATAFILE_SIZE (16777216LU)
|
||||
#define MAX_DATAFILES (65536) /* Supports up to 64TiB for now */
|
||||
#define TARGET_DATAFILES (20)
|
||||
|
||||
#define DATAFILE_IDEAL_IO_SIZE (1048576U)
|
||||
|
||||
struct extent_info {
|
||||
uint64_t offset;
|
||||
uint32_t size;
|
||||
uint8_t number_of_pages;
|
||||
struct rrdengine_datafile *datafile;
|
||||
struct extent_info *next;
|
||||
struct rrdeng_page_cache_descr *pages[];
|
||||
};
|
||||
|
||||
struct rrdengine_df_extents {
|
||||
/* the extent list is sorted based on disk offset */
|
||||
struct extent_info *first;
|
||||
struct extent_info *last;
|
||||
};
|
||||
|
||||
/* only one event loop is supported for now */
|
||||
struct rrdengine_datafile {
|
||||
unsigned tier;
|
||||
unsigned fileno;
|
||||
uv_file file;
|
||||
uint64_t pos;
|
||||
struct rrdengine_instance *ctx;
|
||||
struct rrdengine_df_extents extents;
|
||||
struct rrdengine_journalfile *journalfile;
|
||||
struct rrdengine_datafile *next;
|
||||
};
|
||||
|
||||
struct rrdengine_datafile_list {
|
||||
struct rrdengine_datafile *first; /* oldest */
|
||||
struct rrdengine_datafile *last; /* newest */
|
||||
};
|
||||
|
||||
extern void df_extent_insert(struct extent_info *extent);
|
||||
extern void datafile_list_insert(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile);
|
||||
extern void datafile_list_delete(struct rrdengine_instance *ctx, struct rrdengine_datafile *datafile);
|
||||
extern int destroy_data_file(struct rrdengine_datafile *datafile);
|
||||
extern int create_data_file(struct rrdengine_datafile *datafile);
|
||||
extern void create_new_datafile_pair(struct rrdengine_instance *ctx, unsigned tier, unsigned fileno);
|
||||
extern int init_data_files(struct rrdengine_instance *ctx);
|
||||
|
||||
#endif /* NETDATA_DATAFILE_H */
|
462
database/engine/journalfile.c
Normal file
462
database/engine/journalfile.c
Normal file
|
@ -0,0 +1,462 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#include "rrdengine.h"
|
||||
|
||||
static void flush_transaction_buffer_cb(uv_fs_t* req)
|
||||
{
|
||||
struct generic_io_descriptor *io_descr;
|
||||
|
||||
debug(D_RRDENGINE, "%s: Journal block was written to disk.", __func__);
|
||||
if (req->result < 0) {
|
||||
fatal("%s: uv_fs_write: %s", __func__, uv_strerror((int)req->result));
|
||||
}
|
||||
io_descr = req->data;
|
||||
|
||||
uv_fs_req_cleanup(req);
|
||||
free(io_descr->buf);
|
||||
free(io_descr);
|
||||
}
|
||||
|
||||
/* Careful to always call this before creating a new journal file */
|
||||
void wal_flush_transaction_buffer(struct rrdengine_worker_config* wc)
|
||||
{
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
int ret;
|
||||
struct generic_io_descriptor *io_descr;
|
||||
unsigned pos, size;
|
||||
struct rrdengine_journalfile *journalfile;
|
||||
|
||||
if (unlikely(NULL == ctx->commit_log.buf || 0 == ctx->commit_log.buf_pos)) {
|
||||
return;
|
||||
}
|
||||
/* care with outstanding transactions when switching journal files */
|
||||
journalfile = ctx->datafiles.last->journalfile;
|
||||
|
||||
io_descr = mallocz(sizeof(*io_descr));
|
||||
pos = ctx->commit_log.buf_pos;
|
||||
size = ctx->commit_log.buf_size;
|
||||
if (pos < size) {
|
||||
/* simulate an empty transaction to skip the rest of the block */
|
||||
*(uint8_t *) (ctx->commit_log.buf + pos) = STORE_PADDING;
|
||||
}
|
||||
io_descr->buf = ctx->commit_log.buf;
|
||||
io_descr->bytes = size;
|
||||
io_descr->pos = journalfile->pos;
|
||||
io_descr->req.data = io_descr;
|
||||
io_descr->completion = NULL;
|
||||
|
||||
io_descr->iov = uv_buf_init((void *)io_descr->buf, size);
|
||||
ret = uv_fs_write(wc->loop, &io_descr->req, journalfile->file, &io_descr->iov, 1,
|
||||
journalfile->pos, flush_transaction_buffer_cb);
|
||||
assert (-1 != ret);
|
||||
journalfile->pos += RRDENG_BLOCK_SIZE;
|
||||
ctx->disk_space += RRDENG_BLOCK_SIZE;
|
||||
ctx->commit_log.buf = NULL;
|
||||
ctx->stats.io_write_bytes += RRDENG_BLOCK_SIZE;
|
||||
++ctx->stats.io_write_requests;
|
||||
}
|
||||
|
||||
void * wal_get_transaction_buffer(struct rrdengine_worker_config* wc, unsigned size)
|
||||
{
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
int ret;
|
||||
unsigned buf_pos, buf_size;
|
||||
|
||||
assert(size);
|
||||
if (ctx->commit_log.buf) {
|
||||
unsigned remaining;
|
||||
|
||||
buf_pos = ctx->commit_log.buf_pos;
|
||||
buf_size = ctx->commit_log.buf_size;
|
||||
remaining = buf_size - buf_pos;
|
||||
if (size > remaining) {
|
||||
/* we need a new buffer */
|
||||
wal_flush_transaction_buffer(wc);
|
||||
}
|
||||
}
|
||||
if (NULL == ctx->commit_log.buf) {
|
||||
buf_size = ALIGN_BYTES_CEILING(size);
|
||||
ret = posix_memalign((void *)&ctx->commit_log.buf, RRDFILE_ALIGNMENT, buf_size);
|
||||
if (unlikely(ret)) {
|
||||
fatal("posix_memalign:%s", strerror(ret));
|
||||
}
|
||||
buf_pos = ctx->commit_log.buf_pos = 0;
|
||||
ctx->commit_log.buf_size = buf_size;
|
||||
}
|
||||
ctx->commit_log.buf_pos += size;
|
||||
|
||||
return ctx->commit_log.buf + buf_pos;
|
||||
}
|
||||
|
||||
static void generate_journalfilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen)
|
||||
{
|
||||
(void) snprintf(str, maxlen, "%s/" WALFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL WALFILE_EXTENSION,
|
||||
datafile->ctx->dbfiles_path, datafile->tier, datafile->fileno);
|
||||
}
|
||||
|
||||
void journalfile_init(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
|
||||
{
|
||||
journalfile->file = (uv_file)0;
|
||||
journalfile->pos = 0;
|
||||
journalfile->datafile = datafile;
|
||||
}
|
||||
|
||||
int destroy_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
|
||||
{
|
||||
struct rrdengine_instance *ctx = datafile->ctx;
|
||||
uv_fs_t req;
|
||||
int ret, fd;
|
||||
char path[1024];
|
||||
|
||||
ret = uv_fs_ftruncate(NULL, &req, journalfile->file, 0, NULL);
|
||||
if (ret < 0) {
|
||||
fatal("uv_fs_ftruncate: %s", uv_strerror(ret));
|
||||
}
|
||||
assert(0 == req.result);
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
ret = uv_fs_close(NULL, &req, journalfile->file, NULL);
|
||||
if (ret < 0) {
|
||||
fatal("uv_fs_close: %s", uv_strerror(ret));
|
||||
exit(ret);
|
||||
}
|
||||
assert(0 == req.result);
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
generate_journalfilepath(datafile, path, sizeof(path));
|
||||
fd = uv_fs_unlink(NULL, &req, path, NULL);
|
||||
if (fd < 0) {
|
||||
fatal("uv_fs_fsunlink: %s", uv_strerror(fd));
|
||||
}
|
||||
assert(0 == req.result);
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
++ctx->stats.journalfile_deletions;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile)
|
||||
{
|
||||
struct rrdengine_instance *ctx = datafile->ctx;
|
||||
uv_fs_t req;
|
||||
uv_file file;
|
||||
int ret, fd;
|
||||
struct rrdeng_jf_sb *superblock;
|
||||
uv_buf_t iov;
|
||||
char path[1024];
|
||||
|
||||
generate_journalfilepath(datafile, path, sizeof(path));
|
||||
fd = uv_fs_open(NULL, &req, path, O_DIRECT | O_CREAT | O_RDWR | O_TRUNC,
|
||||
S_IRUSR | S_IWUSR, NULL);
|
||||
if (fd < 0) {
|
||||
fatal("uv_fs_fsopen: %s", uv_strerror(fd));
|
||||
}
|
||||
assert(req.result >= 0);
|
||||
file = req.result;
|
||||
uv_fs_req_cleanup(&req);
|
||||
#ifdef __APPLE__
|
||||
info("Disabling OS X caching for file \"%s\".", path);
|
||||
fcntl(fd, F_NOCACHE, 1);
|
||||
#endif
|
||||
|
||||
ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock));
|
||||
if (unlikely(ret)) {
|
||||
fatal("posix_memalign:%s", strerror(ret));
|
||||
}
|
||||
(void) strncpy(superblock->magic_number, RRDENG_JF_MAGIC, RRDENG_MAGIC_SZ);
|
||||
(void) strncpy(superblock->version, RRDENG_JF_VER, RRDENG_VER_SZ);
|
||||
|
||||
iov = uv_buf_init((void *)superblock, sizeof(*superblock));
|
||||
|
||||
ret = uv_fs_write(NULL, &req, file, &iov, 1, 0, NULL);
|
||||
if (ret < 0) {
|
||||
fatal("uv_fs_write: %s", uv_strerror(ret));
|
||||
}
|
||||
if (req.result < 0) {
|
||||
fatal("uv_fs_write: %s", uv_strerror((int)req.result));
|
||||
}
|
||||
uv_fs_req_cleanup(&req);
|
||||
free(superblock);
|
||||
|
||||
journalfile->file = file;
|
||||
journalfile->pos = sizeof(*superblock);
|
||||
ctx->stats.io_write_bytes += sizeof(*superblock);
|
||||
++ctx->stats.io_write_requests;
|
||||
++ctx->stats.journalfile_creations;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_journal_file_superblock(uv_file file)
|
||||
{
|
||||
int ret;
|
||||
struct rrdeng_jf_sb *superblock;
|
||||
uv_buf_t iov;
|
||||
uv_fs_t req;
|
||||
|
||||
ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock));
|
||||
if (unlikely(ret)) {
|
||||
fatal("posix_memalign:%s", strerror(ret));
|
||||
}
|
||||
iov = uv_buf_init((void *)superblock, sizeof(*superblock));
|
||||
|
||||
ret = uv_fs_read(NULL, &req, file, &iov, 1, 0, NULL);
|
||||
if (ret < 0) {
|
||||
error("uv_fs_read: %s", uv_strerror(ret));
|
||||
uv_fs_req_cleanup(&req);
|
||||
goto error;
|
||||
}
|
||||
assert(req.result >= 0);
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
if (strncmp(superblock->magic_number, RRDENG_JF_MAGIC, RRDENG_MAGIC_SZ) ||
|
||||
strncmp(superblock->version, RRDENG_JF_VER, RRDENG_VER_SZ)) {
|
||||
error("File has invalid superblock.");
|
||||
ret = UV_EINVAL;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
error:
|
||||
free(superblock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
|
||||
void *buf, unsigned max_size)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
unsigned i, count, payload_length, descr_size, valid_pages;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
struct extent_info *extent;
|
||||
/* persistent structures */
|
||||
struct rrdeng_jf_store_data *jf_metric_data;
|
||||
|
||||
jf_metric_data = buf;
|
||||
count = jf_metric_data->number_of_pages;
|
||||
descr_size = sizeof(*jf_metric_data->descr) * count;
|
||||
payload_length = sizeof(*jf_metric_data) + descr_size;
|
||||
if (payload_length > max_size) {
|
||||
error("Corrupted transaction payload.");
|
||||
return;
|
||||
}
|
||||
|
||||
extent = mallocz(sizeof(*extent) + count * sizeof(extent->pages[0]));
|
||||
extent->offset = jf_metric_data->extent_offset;
|
||||
extent->size = jf_metric_data->extent_size;
|
||||
extent->number_of_pages = count;
|
||||
extent->datafile = journalfile->datafile;
|
||||
extent->next = NULL;
|
||||
|
||||
for (i = 0, valid_pages = 0 ; i < count ; ++i) {
|
||||
uuid_t *temp_id;
|
||||
Pvoid_t *PValue;
|
||||
struct pg_cache_page_index *page_index;
|
||||
|
||||
if (PAGE_METRICS != jf_metric_data->descr[i].type) {
|
||||
error("Unknown page type encountered.");
|
||||
continue;
|
||||
}
|
||||
++valid_pages;
|
||||
temp_id = (uuid_t *)jf_metric_data->descr[i].uuid;
|
||||
|
||||
uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
|
||||
PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, temp_id, sizeof(uuid_t));
|
||||
if (likely(NULL != PValue)) {
|
||||
page_index = *PValue;
|
||||
}
|
||||
uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
|
||||
if (NULL == PValue) {
|
||||
/* First time we see the UUID */
|
||||
uv_rwlock_wrlock(&pg_cache->metrics_index.lock);
|
||||
PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, temp_id, sizeof(uuid_t), PJE0);
|
||||
assert(NULL == *PValue); /* TODO: figure out concurrency model */
|
||||
*PValue = page_index = create_page_index(temp_id);
|
||||
uv_rwlock_wrunlock(&pg_cache->metrics_index.lock);
|
||||
}
|
||||
|
||||
descr = pg_cache_create_descr();
|
||||
descr->page_length = jf_metric_data->descr[i].page_length;
|
||||
descr->start_time = jf_metric_data->descr[i].start_time;
|
||||
descr->end_time = jf_metric_data->descr[i].end_time;
|
||||
descr->id = &page_index->id;
|
||||
descr->extent = extent;
|
||||
extent->pages[i] = descr;
|
||||
pg_cache_insert(ctx, page_index, descr);
|
||||
}
|
||||
if (likely(valid_pages))
|
||||
df_extent_insert(extent);
|
||||
}
|
||||
|
||||
/*
|
||||
* Replays transaction by interpreting up to max_size bytes from buf.
|
||||
* Sets id to the current transaction id or to 0 if unknown.
|
||||
* Returns size of transaction record or 0 for unknown size.
|
||||
*/
|
||||
static unsigned replay_transaction(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
|
||||
void *buf, uint64_t *id, unsigned max_size)
|
||||
{
|
||||
unsigned payload_length, size_bytes;
|
||||
int ret;
|
||||
/* persistent structures */
|
||||
struct rrdeng_jf_transaction_header *jf_header;
|
||||
struct rrdeng_jf_transaction_trailer *jf_trailer;
|
||||
uLong crc;
|
||||
|
||||
*id = 0;
|
||||
jf_header = buf;
|
||||
if (STORE_PADDING == jf_header->type) {
|
||||
debug(D_RRDENGINE, "Skipping padding.");
|
||||
return 0;
|
||||
}
|
||||
if (sizeof(*jf_header) > max_size) {
|
||||
error("Corrupted transaction record, skipping.");
|
||||
return 0;
|
||||
}
|
||||
*id = jf_header->id;
|
||||
payload_length = jf_header->payload_length;
|
||||
size_bytes = sizeof(*jf_header) + payload_length + sizeof(*jf_trailer);
|
||||
if (size_bytes > max_size) {
|
||||
error("Corrupted transaction record, skipping.");
|
||||
return 0;
|
||||
}
|
||||
jf_trailer = buf + sizeof(*jf_header) + payload_length;
|
||||
crc = crc32(0L, Z_NULL, 0);
|
||||
crc = crc32(crc, buf, sizeof(*jf_header) + payload_length);
|
||||
ret = crc32cmp(jf_trailer->checksum, crc);
|
||||
debug(D_RRDENGINE, "Transaction %"PRIu64" was read from disk. CRC32 check: %s", *id, ret ? "FAILED" : "SUCCEEDED");
|
||||
if (unlikely(ret)) {
|
||||
return size_bytes;
|
||||
}
|
||||
switch (jf_header->type) {
|
||||
case STORE_DATA:
|
||||
debug(D_RRDENGINE, "Replaying transaction %"PRIu64"", jf_header->id);
|
||||
restore_extent_metadata(ctx, journalfile, buf + sizeof(*jf_header), payload_length);
|
||||
break;
|
||||
default:
|
||||
error("Unknown transaction type. Skipping record.");
|
||||
break;
|
||||
}
|
||||
|
||||
return size_bytes;
|
||||
}
|
||||
|
||||
|
||||
#define READAHEAD_BYTES (RRDENG_BLOCK_SIZE * 256)
|
||||
/*
|
||||
* Iterates journal file transactions and populates the page cache.
|
||||
* Page cache must already be initialized.
|
||||
* Returns the maximum transaction id it discovered.
|
||||
*/
|
||||
static uint64_t iterate_transactions(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile)
|
||||
{
|
||||
uv_file file;
|
||||
uint64_t file_size;//, data_file_size;
|
||||
int ret;
|
||||
uint64_t pos, pos_i, max_id, id;
|
||||
unsigned size_bytes;
|
||||
void *buf;
|
||||
uv_buf_t iov;
|
||||
uv_fs_t req;
|
||||
|
||||
file = journalfile->file;
|
||||
file_size = journalfile->pos;
|
||||
//data_file_size = journalfile->datafile->pos; TODO: utilize this?
|
||||
|
||||
max_id = 1;
|
||||
ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES);
|
||||
if (unlikely(ret)) {
|
||||
fatal("posix_memalign:%s", strerror(ret));
|
||||
}
|
||||
|
||||
for (pos = sizeof(struct rrdeng_jf_sb) ; pos < file_size ; pos += READAHEAD_BYTES) {
|
||||
size_bytes = MIN(READAHEAD_BYTES, file_size - pos);
|
||||
iov = uv_buf_init(buf, size_bytes);
|
||||
ret = uv_fs_read(NULL, &req, file, &iov, 1, pos, NULL);
|
||||
if (ret < 0) {
|
||||
fatal("uv_fs_read: %s", uv_strerror(ret));
|
||||
/*uv_fs_req_cleanup(&req);*/
|
||||
}
|
||||
assert(req.result >= 0);
|
||||
uv_fs_req_cleanup(&req);
|
||||
ctx->stats.io_read_bytes += size_bytes;
|
||||
++ctx->stats.io_read_requests;
|
||||
|
||||
//pos_i = pos;
|
||||
//while (pos_i < pos + size_bytes) {
|
||||
for (pos_i = 0 ; pos_i < size_bytes ; ) {
|
||||
unsigned max_size;
|
||||
|
||||
max_size = pos + size_bytes - pos_i;
|
||||
ret = replay_transaction(ctx, journalfile, buf + pos_i, &id, max_size);
|
||||
if (!ret) /* TODO: support transactions bigger than 4K */
|
||||
/* unknown transaction size, move on to the next block */
|
||||
pos_i = ALIGN_BYTES_FLOOR(pos_i + RRDENG_BLOCK_SIZE);
|
||||
else
|
||||
pos_i += ret;
|
||||
max_id = MAX(max_id, id);
|
||||
}
|
||||
}
|
||||
|
||||
free(buf);
|
||||
return max_id;
|
||||
}
|
||||
|
||||
int load_journal_file(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
|
||||
struct rrdengine_datafile *datafile)
|
||||
{
|
||||
uv_fs_t req;
|
||||
uv_file file;
|
||||
int ret, fd;
|
||||
uint64_t file_size, max_id;
|
||||
char path[1024];
|
||||
|
||||
generate_journalfilepath(datafile, path, sizeof(path));
|
||||
fd = uv_fs_open(NULL, &req, path, O_DIRECT | O_RDWR, S_IRUSR | S_IWUSR, NULL);
|
||||
if (fd < 0) {
|
||||
/* if (UV_ENOENT != fd) */
|
||||
error("uv_fs_fsopen: %s", uv_strerror(fd));
|
||||
uv_fs_req_cleanup(&req);
|
||||
return fd;
|
||||
}
|
||||
assert(req.result >= 0);
|
||||
file = req.result;
|
||||
uv_fs_req_cleanup(&req);
|
||||
#ifdef __APPLE__
|
||||
info("Disabling OS X caching for file \"%s\".", path);
|
||||
fcntl(fd, F_NOCACHE, 1);
|
||||
#endif
|
||||
info("Loading journal file \"%s\".", path);
|
||||
|
||||
ret = check_file_properties(file, &file_size, sizeof(struct rrdeng_df_sb));
|
||||
if (ret)
|
||||
goto error;
|
||||
file_size = ALIGN_BYTES_FLOOR(file_size);
|
||||
|
||||
ret = check_journal_file_superblock(file);
|
||||
if (ret)
|
||||
goto error;
|
||||
ctx->stats.io_read_bytes += sizeof(struct rrdeng_jf_sb);
|
||||
++ctx->stats.io_read_requests;
|
||||
|
||||
journalfile->file = file;
|
||||
journalfile->pos = file_size;
|
||||
|
||||
max_id = iterate_transactions(ctx, journalfile);
|
||||
|
||||
ctx->commit_log.transaction_id = MAX(ctx->commit_log.transaction_id, max_id + 1);
|
||||
|
||||
info("Journal file \"%s\" loaded (size:%"PRIu64").", path, file_size);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
(void) uv_fs_close(NULL, &req, file, NULL);
|
||||
uv_fs_req_cleanup(&req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void init_commit_log(struct rrdengine_instance *ctx)
|
||||
{
|
||||
ctx->commit_log.buf = NULL;
|
||||
ctx->commit_log.buf_pos = 0;
|
||||
ctx->commit_log.transaction_id = 1;
|
||||
}
|
46
database/engine/journalfile.h
Normal file
46
database/engine/journalfile.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_JOURNALFILE_H
|
||||
#define NETDATA_JOURNALFILE_H
|
||||
|
||||
#include "rrdengine.h"
|
||||
|
||||
/* Forward declarations */
|
||||
struct rrdengine_instance;
|
||||
struct rrdengine_worker_config;
|
||||
struct rrdengine_datafile;
|
||||
struct rrdengine_journalfile;
|
||||
|
||||
#define WALFILE_PREFIX "journalfile-"
|
||||
#define WALFILE_EXTENSION ".njf"
|
||||
|
||||
|
||||
/* only one event loop is supported for now */
|
||||
struct rrdengine_journalfile {
|
||||
uv_file file;
|
||||
uint64_t pos;
|
||||
|
||||
struct rrdengine_datafile *datafile;
|
||||
};
|
||||
|
||||
/* only one event loop is supported for now */
|
||||
struct transaction_commit_log {
|
||||
uint64_t transaction_id;
|
||||
|
||||
/* outstanding transaction buffer */
|
||||
void *buf;
|
||||
unsigned buf_pos;
|
||||
unsigned buf_size;
|
||||
};
|
||||
|
||||
extern void journalfile_init(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile);
|
||||
extern void *wal_get_transaction_buffer(struct rrdengine_worker_config* wc, unsigned size);
|
||||
extern void wal_flush_transaction_buffer(struct rrdengine_worker_config* wc);
|
||||
extern int destroy_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile);
|
||||
extern int create_journal_file(struct rrdengine_journalfile *journalfile, struct rrdengine_datafile *datafile);
|
||||
extern int load_journal_file(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile,
|
||||
struct rrdengine_datafile *datafile);
|
||||
extern void init_commit_log(struct rrdengine_instance *ctx);
|
||||
|
||||
|
||||
#endif /* NETDATA_JOURNALFILE_H */
|
785
database/engine/pagecache.c
Normal file
785
database/engine/pagecache.c
Normal file
|
@ -0,0 +1,785 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#define NETDATA_RRD_INTERNALS
|
||||
|
||||
#include "rrdengine.h"
|
||||
|
||||
/* Forward declerations */
|
||||
static int pg_cache_try_evict_one_page_unsafe(struct rrdengine_instance *ctx);
|
||||
|
||||
/* always inserts into tail */
|
||||
static inline void pg_cache_replaceQ_insert_unsafe(struct rrdengine_instance *ctx,
|
||||
struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
if (likely(NULL != pg_cache->replaceQ.tail)) {
|
||||
descr->prev = pg_cache->replaceQ.tail;
|
||||
pg_cache->replaceQ.tail->next = descr;
|
||||
}
|
||||
if (unlikely(NULL == pg_cache->replaceQ.head)) {
|
||||
pg_cache->replaceQ.head = descr;
|
||||
}
|
||||
pg_cache->replaceQ.tail = descr;
|
||||
}
|
||||
|
||||
static inline void pg_cache_replaceQ_delete_unsafe(struct rrdengine_instance *ctx,
|
||||
struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
struct rrdeng_page_cache_descr *prev, *next;
|
||||
|
||||
prev = descr->prev;
|
||||
next = descr->next;
|
||||
|
||||
if (likely(NULL != prev)) {
|
||||
prev->next = next;
|
||||
}
|
||||
if (likely(NULL != next)) {
|
||||
next->prev = prev;
|
||||
}
|
||||
if (unlikely(descr == pg_cache->replaceQ.head)) {
|
||||
pg_cache->replaceQ.head = next;
|
||||
}
|
||||
if (unlikely(descr == pg_cache->replaceQ.tail)) {
|
||||
pg_cache->replaceQ.tail = prev;
|
||||
}
|
||||
descr->prev = descr->next = NULL;
|
||||
}
|
||||
|
||||
void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx,
|
||||
struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->replaceQ.lock);
|
||||
pg_cache_replaceQ_insert_unsafe(ctx, descr);
|
||||
uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
|
||||
}
|
||||
|
||||
void pg_cache_replaceQ_delete(struct rrdengine_instance *ctx,
|
||||
struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->replaceQ.lock);
|
||||
pg_cache_replaceQ_delete_unsafe(ctx, descr);
|
||||
uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
|
||||
}
|
||||
void pg_cache_replaceQ_set_hot(struct rrdengine_instance *ctx,
|
||||
struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->replaceQ.lock);
|
||||
pg_cache_replaceQ_delete_unsafe(ctx, descr);
|
||||
pg_cache_replaceQ_insert_unsafe(ctx, descr);
|
||||
uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
|
||||
}
|
||||
|
||||
struct rrdeng_page_cache_descr *pg_cache_create_descr(void)
|
||||
{
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
|
||||
descr = mallocz(sizeof(*descr));
|
||||
descr->page = NULL;
|
||||
descr->page_length = 0;
|
||||
descr->start_time = INVALID_TIME;
|
||||
descr->end_time = INVALID_TIME;
|
||||
descr->id = NULL;
|
||||
descr->extent = NULL;
|
||||
descr->flags = 0;
|
||||
descr->prev = descr->next = descr->private = NULL;
|
||||
descr->refcnt = 0;
|
||||
descr->waiters = 0;
|
||||
descr->handle = NULL;
|
||||
assert(0 == uv_cond_init(&descr->cond));
|
||||
assert(0 == uv_mutex_init(&descr->mutex));
|
||||
|
||||
return descr;
|
||||
}
|
||||
|
||||
void pg_cache_destroy_descr(struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
uv_cond_destroy(&descr->cond);
|
||||
uv_mutex_destroy(&descr->mutex);
|
||||
free(descr);
|
||||
}
|
||||
|
||||
/* The caller must hold page descriptor lock. */
|
||||
void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
if (descr->waiters)
|
||||
uv_cond_broadcast(&descr->cond);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller must hold page descriptor lock.
|
||||
* The lock will be released and re-acquired. The descriptor is not guaranteed
|
||||
* to exist after this function returns.
|
||||
*/
|
||||
void pg_cache_wait_event_unsafe(struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
++descr->waiters;
|
||||
uv_cond_wait(&descr->cond, &descr->mutex);
|
||||
--descr->waiters;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns page flags.
|
||||
* The lock will be released and re-acquired. The descriptor is not guaranteed
|
||||
* to exist after this function returns.
|
||||
*/
|
||||
unsigned long pg_cache_wait_event(struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
pg_cache_wait_event_unsafe(descr);
|
||||
flags = descr->flags;
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller must hold page descriptor lock.
|
||||
* Gets a reference to the page descriptor.
|
||||
* Returns 1 on success and 0 on failure.
|
||||
*/
|
||||
int pg_cache_try_get_unsafe(struct rrdeng_page_cache_descr *descr, int exclusive_access)
|
||||
{
|
||||
if ((descr->flags & (RRD_PAGE_LOCKED | RRD_PAGE_READ_PENDING)) ||
|
||||
(exclusive_access && descr->refcnt)) {
|
||||
return 0;
|
||||
}
|
||||
if (exclusive_access)
|
||||
descr->flags |= RRD_PAGE_LOCKED;
|
||||
++descr->refcnt;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller must hold page descriptor lock.
|
||||
* Same return values as pg_cache_try_get_unsafe() without doing anything.
|
||||
*/
|
||||
int pg_cache_can_get_unsafe(struct rrdeng_page_cache_descr *descr, int exclusive_access)
|
||||
{
|
||||
if ((descr->flags & (RRD_PAGE_LOCKED | RRD_PAGE_READ_PENDING)) ||
|
||||
(exclusive_access && descr->refcnt)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller must hold the page descriptor lock.
|
||||
* This function may block doing cleanup.
|
||||
*/
|
||||
void pg_cache_put_unsafe(struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
descr->flags &= ~RRD_PAGE_LOCKED;
|
||||
if (0 == --descr->refcnt) {
|
||||
pg_cache_wake_up_waiters_unsafe(descr);
|
||||
}
|
||||
/* TODO: perform cleanup */
|
||||
}
|
||||
|
||||
/*
|
||||
* This function may block doing cleanup.
|
||||
*/
|
||||
void pg_cache_put(struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
pg_cache_put_unsafe(descr);
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
}
|
||||
|
||||
/* The caller must hold the page cache lock */
|
||||
static void pg_cache_release_pages_unsafe(struct rrdengine_instance *ctx, unsigned number)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
pg_cache->populated_pages -= number;
|
||||
}
|
||||
|
||||
static void pg_cache_release_pages(struct rrdengine_instance *ctx, unsigned number)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
|
||||
pg_cache_release_pages_unsafe(ctx, number);
|
||||
uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
|
||||
}
|
||||
/*
|
||||
* This function will block until it reserves #number populated pages.
|
||||
* It will trigger evictions or dirty page flushing if the ctx->max_cache_pages limit is hit.
|
||||
*/
|
||||
static void pg_cache_reserve_pages(struct rrdengine_instance *ctx, unsigned number)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
assert(number < ctx->max_cache_pages);
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
|
||||
if (pg_cache->populated_pages + number >= ctx->max_cache_pages + 1)
|
||||
debug(D_RRDENGINE, "=================================\nPage cache full. Reserving %u pages.\n=================================",
|
||||
number);
|
||||
while (pg_cache->populated_pages + number >= ctx->max_cache_pages + 1) {
|
||||
if (!pg_cache_try_evict_one_page_unsafe(ctx)) {
|
||||
/* failed to evict */
|
||||
struct completion compl;
|
||||
struct rrdeng_cmd cmd;
|
||||
|
||||
uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
|
||||
|
||||
init_completion(&compl);
|
||||
cmd.opcode = RRDENG_FLUSH_PAGES;
|
||||
cmd.completion = &compl;
|
||||
rrdeng_enq_cmd(&ctx->worker_config, &cmd);
|
||||
/* wait for some pages to be flushed */
|
||||
debug(D_RRDENGINE, "%s: waiting for pages to be written to disk before evicting.", __func__);
|
||||
wait_for_completion(&compl);
|
||||
destroy_completion(&compl);
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
|
||||
}
|
||||
}
|
||||
pg_cache->populated_pages += number;
|
||||
uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function will attempt to reserve #number populated pages.
|
||||
* It may trigger evictions if the ctx->cache_pages_low_watermark limit is hit.
|
||||
* Returns 0 on failure and 1 on success.
|
||||
*/
|
||||
static int pg_cache_try_reserve_pages(struct rrdengine_instance *ctx, unsigned number)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
unsigned count = 0;
|
||||
int ret = 0;
|
||||
|
||||
assert(number < ctx->max_cache_pages);
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
|
||||
if (pg_cache->populated_pages + number >= ctx->cache_pages_low_watermark + 1) {
|
||||
debug(D_RRDENGINE,
|
||||
"=================================\nPage cache full. Trying to reserve %u pages.\n=================================",
|
||||
number);
|
||||
do {
|
||||
if (!pg_cache_try_evict_one_page_unsafe(ctx))
|
||||
break;
|
||||
++count;
|
||||
} while (pg_cache->populated_pages + number >= ctx->cache_pages_low_watermark + 1);
|
||||
debug(D_RRDENGINE, "Evicted %u pages.", count);
|
||||
}
|
||||
|
||||
if (pg_cache->populated_pages + number < ctx->max_cache_pages + 1) {
|
||||
pg_cache->populated_pages += number;
|
||||
ret = 1; /* success */
|
||||
}
|
||||
uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* The caller must hold the page cache and the page descriptor locks in that order */
|
||||
static void pg_cache_evict_unsafe(struct rrdengine_instance *ctx, struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
free(descr->page);
|
||||
descr->page = NULL;
|
||||
descr->flags &= ~RRD_PAGE_POPULATED;
|
||||
pg_cache_release_pages_unsafe(ctx, 1);
|
||||
++ctx->stats.pg_cache_evictions;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller must hold the page cache lock.
|
||||
* Lock order: page cache -> replaceQ -> descriptor
|
||||
* This function iterates all pages and tries to evict one.
|
||||
* If it fails it sets in_flight_descr to the oldest descriptor that has write-back in progress,
|
||||
* or it sets it to NULL if no write-back is in progress.
|
||||
*
|
||||
* Returns 1 on success and 0 on failure.
|
||||
*/
|
||||
static int pg_cache_try_evict_one_page_unsafe(struct rrdengine_instance *ctx)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
unsigned long old_flags;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->replaceQ.lock);
|
||||
for (descr = pg_cache->replaceQ.head ; NULL != descr ; descr = descr->next) {
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
old_flags = descr->flags;
|
||||
if ((old_flags & RRD_PAGE_POPULATED) && !(old_flags & RRD_PAGE_DIRTY) && pg_cache_try_get_unsafe(descr, 1)) {
|
||||
/* must evict */
|
||||
pg_cache_evict_unsafe(ctx, descr);
|
||||
pg_cache_put_unsafe(descr);
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
pg_cache_replaceQ_delete_unsafe(ctx, descr);
|
||||
uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
|
||||
|
||||
return 1;
|
||||
}
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
};
|
||||
uv_rwlock_wrunlock(&pg_cache->replaceQ.lock);
|
||||
|
||||
/* failed to evict */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: last waiter frees descriptor ?
|
||||
*/
|
||||
void pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
Pvoid_t *PValue;
|
||||
struct pg_cache_page_index *page_index;
|
||||
int ret;
|
||||
|
||||
uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
|
||||
PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, descr->id, sizeof(uuid_t));
|
||||
assert(NULL != PValue);
|
||||
page_index = *PValue;
|
||||
uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
|
||||
|
||||
uv_rwlock_wrlock(&page_index->lock);
|
||||
ret = JudyLDel(&page_index->JudyL_array, (Word_t)(descr->start_time / USEC_PER_SEC), PJE0);
|
||||
assert(1 == ret);
|
||||
uv_rwlock_wrunlock(&page_index->lock);
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
|
||||
++ctx->stats.pg_cache_deletions;
|
||||
--pg_cache->page_descriptors;
|
||||
uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
|
||||
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
while (!pg_cache_try_get_unsafe(descr, 1)) {
|
||||
debug(D_RRDENGINE, "%s: Waiting for locked page:", __func__);
|
||||
if(unlikely(debug_flags & D_RRDENGINE))
|
||||
print_page_cache_descr(descr);
|
||||
pg_cache_wait_event_unsafe(descr);
|
||||
}
|
||||
/* even a locked page could be dirty */
|
||||
while (unlikely(descr->flags & RRD_PAGE_DIRTY)) {
|
||||
debug(D_RRDENGINE, "%s: Found dirty page, waiting for it to be flushed:", __func__);
|
||||
if(unlikely(debug_flags & D_RRDENGINE))
|
||||
print_page_cache_descr(descr);
|
||||
pg_cache_wait_event_unsafe(descr);
|
||||
}
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
|
||||
if (descr->flags & RRD_PAGE_POPULATED) {
|
||||
/* only after locking can it be safely deleted from LRU */
|
||||
pg_cache_replaceQ_delete(ctx, descr);
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
|
||||
pg_cache_evict_unsafe(ctx, descr);
|
||||
uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
|
||||
}
|
||||
pg_cache_put(descr);
|
||||
pg_cache_destroy_descr(descr);
|
||||
pg_cache_update_metric_times(page_index);
|
||||
}
|
||||
|
||||
static inline int is_page_in_time_range(struct rrdeng_page_cache_descr *descr, usec_t start_time, usec_t end_time)
|
||||
{
|
||||
usec_t pg_start, pg_end;
|
||||
|
||||
pg_start = descr->start_time;
|
||||
pg_end = descr->end_time;
|
||||
|
||||
return (pg_start < start_time && pg_end >= start_time) ||
|
||||
(pg_start >= start_time && pg_start <= end_time);
|
||||
}
|
||||
|
||||
static inline int is_point_in_time_in_page(struct rrdeng_page_cache_descr *descr, usec_t point_in_time)
|
||||
{
|
||||
return (point_in_time >= descr->start_time && point_in_time <= descr->end_time);
|
||||
}
|
||||
|
||||
/* Update metric oldest and latest timestamps efficiently when adding new values */
|
||||
void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
usec_t oldest_time = page_index->oldest_time;
|
||||
usec_t latest_time = page_index->latest_time;
|
||||
|
||||
if (unlikely(oldest_time == INVALID_TIME || descr->start_time < oldest_time)) {
|
||||
page_index->oldest_time = descr->start_time;
|
||||
}
|
||||
if (likely(descr->end_time > latest_time || latest_time == INVALID_TIME)) {
|
||||
page_index->latest_time = descr->end_time;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update metric oldest and latest timestamps when removing old values */
|
||||
void pg_cache_update_metric_times(struct pg_cache_page_index *page_index)
|
||||
{
|
||||
Pvoid_t *firstPValue, *lastPValue;
|
||||
Word_t firstIndex, lastIndex;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
usec_t oldest_time = INVALID_TIME;
|
||||
usec_t latest_time = INVALID_TIME;
|
||||
|
||||
uv_rwlock_rdlock(&page_index->lock);
|
||||
/* Find first page in range */
|
||||
firstIndex = (Word_t)0;
|
||||
firstPValue = JudyLFirst(page_index->JudyL_array, &firstIndex, PJE0);
|
||||
if (likely(NULL != firstPValue)) {
|
||||
descr = *firstPValue;
|
||||
oldest_time = descr->start_time;
|
||||
}
|
||||
lastIndex = (Word_t)-1;
|
||||
lastPValue = JudyLLast(page_index->JudyL_array, &lastIndex, PJE0);
|
||||
if (likely(NULL != lastPValue)) {
|
||||
descr = *lastPValue;
|
||||
latest_time = descr->end_time;
|
||||
}
|
||||
uv_rwlock_rdunlock(&page_index->lock);
|
||||
|
||||
if (unlikely(NULL == firstPValue)) {
|
||||
assert(NULL == lastPValue);
|
||||
page_index->oldest_time = page_index->latest_time = INVALID_TIME;
|
||||
return;
|
||||
}
|
||||
page_index->oldest_time = oldest_time;
|
||||
page_index->latest_time = latest_time;
|
||||
}
|
||||
|
||||
/* If index is NULL lookup by UUID (descr->id) */
|
||||
void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index,
|
||||
struct rrdeng_page_cache_descr *descr)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
Pvoid_t *PValue;
|
||||
struct pg_cache_page_index *page_index;
|
||||
|
||||
if (descr->flags & RRD_PAGE_POPULATED) {
|
||||
pg_cache_reserve_pages(ctx, 1);
|
||||
if (!(descr->flags & RRD_PAGE_DIRTY))
|
||||
pg_cache_replaceQ_insert(ctx, descr);
|
||||
}
|
||||
|
||||
if (unlikely(NULL == index)) {
|
||||
uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
|
||||
PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, descr->id, sizeof(uuid_t));
|
||||
assert(NULL != PValue);
|
||||
page_index = *PValue;
|
||||
uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
|
||||
} else {
|
||||
page_index = index;
|
||||
}
|
||||
|
||||
uv_rwlock_wrlock(&page_index->lock);
|
||||
PValue = JudyLIns(&page_index->JudyL_array, (Word_t)(descr->start_time / USEC_PER_SEC), PJE0);
|
||||
*PValue = descr;
|
||||
pg_cache_add_new_metric_time(page_index, descr);
|
||||
uv_rwlock_wrunlock(&page_index->lock);
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock);
|
||||
++ctx->stats.pg_cache_insertions;
|
||||
++pg_cache->page_descriptors;
|
||||
uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Searches for a page and triggers disk I/O if necessary and possible.
|
||||
* Does not get a reference.
|
||||
* Returns page index pointer for given metric UUID.
|
||||
*/
|
||||
struct pg_cache_page_index *
|
||||
pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
struct rrdeng_page_cache_descr *descr = NULL, *preload_array[PAGE_CACHE_MAX_PRELOAD_PAGES];
|
||||
int i, j, k, count, found;
|
||||
unsigned long flags;
|
||||
Pvoid_t *PValue;
|
||||
struct pg_cache_page_index *page_index;
|
||||
Word_t Index;
|
||||
uint8_t failed_to_reserve;
|
||||
|
||||
uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
|
||||
PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t));
|
||||
if (likely(NULL != PValue)) {
|
||||
page_index = *PValue;
|
||||
}
|
||||
uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
|
||||
if (NULL == PValue) {
|
||||
debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uv_rwlock_rdlock(&page_index->lock);
|
||||
/* Find first page in range */
|
||||
found = 0;
|
||||
Index = (Word_t)(start_time / USEC_PER_SEC);
|
||||
PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0);
|
||||
if (likely(NULL != PValue)) {
|
||||
descr = *PValue;
|
||||
if (is_page_in_time_range(descr, start_time, end_time)) {
|
||||
found = 1;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
Index = (Word_t)(start_time / USEC_PER_SEC);
|
||||
PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0);
|
||||
if (likely(NULL != PValue)) {
|
||||
descr = *PValue;
|
||||
if (is_page_in_time_range(descr, start_time, end_time)) {
|
||||
found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
uv_rwlock_rdunlock(&page_index->lock);
|
||||
debug(D_RRDENGINE, "%s: No page was found to attempt preload.", __func__);
|
||||
return page_index;
|
||||
}
|
||||
|
||||
for (count = 0 ;
|
||||
descr != NULL && is_page_in_time_range(descr, start_time, end_time);
|
||||
PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0),
|
||||
descr = unlikely(NULL == PValue) ? NULL : *PValue) {
|
||||
/* Iterate all pages in range */
|
||||
|
||||
if (unlikely(0 == descr->page_length))
|
||||
continue;
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
flags = descr->flags;
|
||||
if (pg_cache_can_get_unsafe(descr, 0)) {
|
||||
if (flags & RRD_PAGE_POPULATED) {
|
||||
/* success */
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
debug(D_RRDENGINE, "%s: Page was found in memory.", __func__);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) {
|
||||
preload_array[count++] = descr;
|
||||
if (PAGE_CACHE_MAX_PRELOAD_PAGES == count) {
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
|
||||
};
|
||||
uv_rwlock_rdunlock(&page_index->lock);
|
||||
|
||||
failed_to_reserve = 0;
|
||||
for (i = 0 ; i < count && !failed_to_reserve ; ++i) {
|
||||
struct rrdeng_cmd cmd;
|
||||
struct rrdeng_page_cache_descr *next;
|
||||
|
||||
descr = preload_array[i];
|
||||
if (NULL == descr) {
|
||||
continue;
|
||||
}
|
||||
if (!pg_cache_try_reserve_pages(ctx, 1)) {
|
||||
failed_to_reserve = 1;
|
||||
break;
|
||||
}
|
||||
cmd.opcode = RRDENG_READ_EXTENT;
|
||||
cmd.read_extent.page_cache_descr[0] = descr;
|
||||
/* don't use this page again */
|
||||
preload_array[i] = NULL;
|
||||
for (j = 0, k = 1 ; j < count ; ++j) {
|
||||
next = preload_array[j];
|
||||
if (NULL == next) {
|
||||
continue;
|
||||
}
|
||||
if (descr->extent == next->extent) {
|
||||
/* same extent, consolidate */
|
||||
if (!pg_cache_try_reserve_pages(ctx, 1)) {
|
||||
failed_to_reserve = 1;
|
||||
break;
|
||||
}
|
||||
cmd.read_extent.page_cache_descr[k++] = next;
|
||||
/* don't use this page again */
|
||||
preload_array[j] = NULL;
|
||||
}
|
||||
}
|
||||
cmd.read_extent.page_count = k;
|
||||
rrdeng_enq_cmd(&ctx->worker_config, &cmd);
|
||||
}
|
||||
if (failed_to_reserve) {
|
||||
debug(D_RRDENGINE, "%s: Failed to reserve enough memory, canceling I/O.", __func__);
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
descr = preload_array[i];
|
||||
if (NULL == descr) {
|
||||
continue;
|
||||
}
|
||||
pg_cache_put(descr);
|
||||
}
|
||||
}
|
||||
if (!count) {
|
||||
/* no such page */
|
||||
debug(D_RRDENGINE, "%s: No page was eligible to attempt preload.", __func__);
|
||||
}
|
||||
return page_index;
|
||||
}
|
||||
|
||||
/*
|
||||
* Searches for a page and gets a reference.
|
||||
* When point_in_time is INVALID_TIME get any page.
|
||||
* If index is NULL lookup by UUID (id).
|
||||
*/
|
||||
struct rrdeng_page_cache_descr *
|
||||
pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id,
|
||||
usec_t point_in_time)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
struct rrdeng_page_cache_descr *descr = NULL;
|
||||
unsigned long flags;
|
||||
Pvoid_t *PValue;
|
||||
struct pg_cache_page_index *page_index;
|
||||
Word_t Index;
|
||||
uint8_t page_not_in_cache;
|
||||
|
||||
if (unlikely(NULL == index)) {
|
||||
uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
|
||||
PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, id, sizeof(uuid_t));
|
||||
if (likely(NULL != PValue)) {
|
||||
page_index = *PValue;
|
||||
}
|
||||
uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
|
||||
if (NULL == PValue) {
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
page_index = index;
|
||||
}
|
||||
pg_cache_reserve_pages(ctx, 1);
|
||||
|
||||
page_not_in_cache = 0;
|
||||
uv_rwlock_rdlock(&page_index->lock);
|
||||
while (1) {
|
||||
Index = (Word_t)(point_in_time / USEC_PER_SEC);
|
||||
PValue = JudyLLast(page_index->JudyL_array, &Index, PJE0);
|
||||
if (likely(NULL != PValue)) {
|
||||
descr = *PValue;
|
||||
}
|
||||
if (NULL == PValue ||
|
||||
0 == descr->page_length ||
|
||||
(INVALID_TIME != point_in_time &&
|
||||
!is_point_in_time_in_page(descr, point_in_time))) {
|
||||
/* non-empty page not found */
|
||||
uv_rwlock_rdunlock(&page_index->lock);
|
||||
|
||||
pg_cache_release_pages(ctx, 1);
|
||||
return NULL;
|
||||
}
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
flags = descr->flags;
|
||||
if ((flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 0)) {
|
||||
/* success */
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
debug(D_RRDENGINE, "%s: Page was found in memory.", __func__);
|
||||
break;
|
||||
}
|
||||
if (!(flags & RRD_PAGE_POPULATED) && pg_cache_try_get_unsafe(descr, 1)) {
|
||||
struct rrdeng_cmd cmd;
|
||||
|
||||
uv_rwlock_rdunlock(&page_index->lock);
|
||||
|
||||
cmd.opcode = RRDENG_READ_PAGE;
|
||||
cmd.read_page.page_cache_descr = descr;
|
||||
rrdeng_enq_cmd(&ctx->worker_config, &cmd);
|
||||
|
||||
debug(D_RRDENGINE, "%s: Waiting for page to be asynchronously read from disk:", __func__);
|
||||
if(unlikely(debug_flags & D_RRDENGINE))
|
||||
print_page_cache_descr(descr);
|
||||
while (!(descr->flags & RRD_PAGE_POPULATED)) {
|
||||
pg_cache_wait_event_unsafe(descr);
|
||||
}
|
||||
/* success */
|
||||
/* Downgrade exclusive reference to allow other readers */
|
||||
descr->flags &= ~RRD_PAGE_LOCKED;
|
||||
pg_cache_wake_up_waiters_unsafe(descr);
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1);
|
||||
return descr;
|
||||
}
|
||||
uv_rwlock_rdunlock(&page_index->lock);
|
||||
debug(D_RRDENGINE, "%s: Waiting for page to be unlocked:", __func__);
|
||||
if(unlikely(debug_flags & D_RRDENGINE))
|
||||
print_page_cache_descr(descr);
|
||||
if (!(flags & RRD_PAGE_POPULATED))
|
||||
page_not_in_cache = 1;
|
||||
pg_cache_wait_event_unsafe(descr);
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
|
||||
/* reset scan to find again */
|
||||
uv_rwlock_rdlock(&page_index->lock);
|
||||
}
|
||||
uv_rwlock_rdunlock(&page_index->lock);
|
||||
|
||||
if (!(flags & RRD_PAGE_DIRTY))
|
||||
pg_cache_replaceQ_set_hot(ctx, descr);
|
||||
pg_cache_release_pages(ctx, 1);
|
||||
if (page_not_in_cache)
|
||||
rrd_stat_atomic_add(&ctx->stats.pg_cache_misses, 1);
|
||||
else
|
||||
rrd_stat_atomic_add(&ctx->stats.pg_cache_hits, 1);
|
||||
return descr;
|
||||
}
|
||||
|
||||
struct pg_cache_page_index *create_page_index(uuid_t *id)
|
||||
{
|
||||
struct pg_cache_page_index *page_index;
|
||||
|
||||
page_index = mallocz(sizeof(*page_index));
|
||||
page_index->JudyL_array = (Pvoid_t) NULL;
|
||||
uuid_copy(page_index->id, *id);
|
||||
assert(0 == uv_rwlock_init(&page_index->lock));
|
||||
page_index->oldest_time = INVALID_TIME;
|
||||
page_index->latest_time = INVALID_TIME;
|
||||
|
||||
return page_index;
|
||||
}
|
||||
|
||||
static void init_metrics_index(struct rrdengine_instance *ctx)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
pg_cache->metrics_index.JudyHS_array = (Pvoid_t) NULL;
|
||||
assert(0 == uv_rwlock_init(&pg_cache->metrics_index.lock));
|
||||
}
|
||||
|
||||
static void init_replaceQ(struct rrdengine_instance *ctx)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
pg_cache->replaceQ.head = NULL;
|
||||
pg_cache->replaceQ.tail = NULL;
|
||||
assert(0 == uv_rwlock_init(&pg_cache->replaceQ.lock));
|
||||
}
|
||||
|
||||
static void init_commited_page_index(struct rrdengine_instance *ctx)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
pg_cache->commited_page_index.JudyL_array = (Pvoid_t) NULL;
|
||||
assert(0 == uv_rwlock_init(&pg_cache->commited_page_index.lock));
|
||||
pg_cache->commited_page_index.latest_corr_id = 0;
|
||||
pg_cache->commited_page_index.nr_commited_pages = 0;
|
||||
}
|
||||
|
||||
void init_page_cache(struct rrdengine_instance *ctx)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
pg_cache->page_descriptors = 0;
|
||||
pg_cache->populated_pages = 0;
|
||||
assert(0 == uv_rwlock_init(&pg_cache->pg_cache_rwlock));
|
||||
|
||||
init_metrics_index(ctx);
|
||||
init_replaceQ(ctx);
|
||||
init_commited_page_index(ctx);
|
||||
}
|
132
database/engine/pagecache.h
Normal file
132
database/engine/pagecache.h
Normal file
|
@ -0,0 +1,132 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_PAGECACHE_H
|
||||
#define NETDATA_PAGECACHE_H
|
||||
|
||||
#include "rrdengine.h"
|
||||
|
||||
/* Forward declerations */
|
||||
struct rrdengine_instance;
|
||||
struct extent_info;
|
||||
|
||||
#define INVALID_TIME (0)
|
||||
|
||||
/* Page flags */
|
||||
#define RRD_PAGE_DIRTY (1LU << 0)
|
||||
#define RRD_PAGE_LOCKED (1LU << 1)
|
||||
#define RRD_PAGE_READ_PENDING (1LU << 2)
|
||||
#define RRD_PAGE_WRITE_PENDING (1LU << 3)
|
||||
#define RRD_PAGE_POPULATED (1LU << 4)
|
||||
|
||||
struct rrdeng_page_cache_descr {
|
||||
void *page;
|
||||
uint32_t page_length;
|
||||
usec_t start_time;
|
||||
usec_t end_time;
|
||||
uuid_t *id; /* never changes */
|
||||
struct extent_info *extent;
|
||||
unsigned long flags;
|
||||
void *private;
|
||||
struct rrdeng_page_cache_descr *prev;
|
||||
struct rrdeng_page_cache_descr *next;
|
||||
|
||||
/* TODO: move waiter logic to concurrency table */
|
||||
unsigned refcnt;
|
||||
uv_mutex_t mutex; /* always take it after the page cache lock or after the commit lock */
|
||||
uv_cond_t cond;
|
||||
unsigned waiters;
|
||||
struct rrdeng_collect_handle *handle; /* API user */
|
||||
};
|
||||
|
||||
#define PAGE_CACHE_MAX_PRELOAD_PAGES (256)
|
||||
|
||||
/* maps time ranges to pages */
|
||||
struct pg_cache_page_index {
|
||||
uuid_t id;
|
||||
/*
|
||||
* care: JudyL_array indices are converted from useconds to seconds to fit in one word in 32-bit architectures
|
||||
* TODO: examine if we want to support better granularity than seconds
|
||||
*/
|
||||
Pvoid_t JudyL_array;
|
||||
uv_rwlock_t lock;
|
||||
|
||||
/*
|
||||
* Only one effective writer, data deletion workqueue.
|
||||
* It's also written during the DB loading phase.
|
||||
*/
|
||||
usec_t oldest_time;
|
||||
|
||||
/*
|
||||
* Only one effective writer, data collection thread.
|
||||
* It's also written by the data deletion workqueue when data collection is disabled for this metric.
|
||||
*/
|
||||
usec_t latest_time;
|
||||
};
|
||||
|
||||
/* maps UUIDs to page indices */
|
||||
struct pg_cache_metrics_index {
|
||||
uv_rwlock_t lock;
|
||||
Pvoid_t JudyHS_array;
|
||||
};
|
||||
|
||||
/* gathers dirty pages to be written on disk */
|
||||
struct pg_cache_commited_page_index {
|
||||
uv_rwlock_t lock;
|
||||
|
||||
Pvoid_t JudyL_array;
|
||||
|
||||
/*
|
||||
* Dirty page correlation ID is a hint. Dirty pages that are correlated should have
|
||||
* a small correlation ID difference. Dirty pages in memory should never have the
|
||||
* same ID at the same time for correctness.
|
||||
*/
|
||||
Word_t latest_corr_id;
|
||||
|
||||
unsigned nr_commited_pages;
|
||||
};
|
||||
|
||||
/* gathers populated pages to be evicted */
|
||||
struct pg_cache_replaceQ {
|
||||
uv_rwlock_t lock; /* LRU lock */
|
||||
|
||||
struct rrdeng_page_cache_descr *head; /* LRU */
|
||||
struct rrdeng_page_cache_descr *tail; /* MRU */
|
||||
};
|
||||
|
||||
struct page_cache { /* TODO: add statistics */
|
||||
uv_rwlock_t pg_cache_rwlock; /* page cache lock */
|
||||
|
||||
struct pg_cache_metrics_index metrics_index;
|
||||
struct pg_cache_commited_page_index commited_page_index;
|
||||
struct pg_cache_replaceQ replaceQ;
|
||||
|
||||
unsigned page_descriptors;
|
||||
unsigned populated_pages;
|
||||
};
|
||||
|
||||
extern void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_cache_descr *descr);
|
||||
extern void pg_cache_wait_event_unsafe(struct rrdeng_page_cache_descr *descr);
|
||||
extern unsigned long pg_cache_wait_event(struct rrdeng_page_cache_descr *descr);
|
||||
extern void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx,
|
||||
struct rrdeng_page_cache_descr *descr);
|
||||
extern void pg_cache_replaceQ_delete(struct rrdengine_instance *ctx,
|
||||
struct rrdeng_page_cache_descr *descr);
|
||||
extern void pg_cache_replaceQ_set_hot(struct rrdengine_instance *ctx,
|
||||
struct rrdeng_page_cache_descr *descr);
|
||||
extern struct rrdeng_page_cache_descr *pg_cache_create_descr(void);
|
||||
extern void pg_cache_put_unsafe(struct rrdeng_page_cache_descr *descr);
|
||||
extern void pg_cache_put(struct rrdeng_page_cache_descr *descr);
|
||||
extern void pg_cache_insert(struct rrdengine_instance *ctx, struct pg_cache_page_index *index,
|
||||
struct rrdeng_page_cache_descr *descr);
|
||||
extern void pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_cache_descr *descr);
|
||||
extern struct pg_cache_page_index *
|
||||
pg_cache_preload(struct rrdengine_instance *ctx, uuid_t *id, usec_t start_time, usec_t end_time);
|
||||
extern struct rrdeng_page_cache_descr *
|
||||
pg_cache_lookup(struct rrdengine_instance *ctx, struct pg_cache_page_index *index, uuid_t *id,
|
||||
usec_t point_in_time);
|
||||
extern struct pg_cache_page_index *create_page_index(uuid_t *id);
|
||||
extern void init_page_cache(struct rrdengine_instance *ctx);
|
||||
extern void pg_cache_add_new_metric_time(struct pg_cache_page_index *page_index, struct rrdeng_page_cache_descr *descr);
|
||||
extern void pg_cache_update_metric_times(struct pg_cache_page_index *page_index);
|
||||
|
||||
#endif /* NETDATA_PAGECACHE_H */
|
119
database/engine/rrddiskprotocol.h
Normal file
119
database/engine/rrddiskprotocol.h
Normal file
|
@ -0,0 +1,119 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDDISKPROTOCOL_H
|
||||
#define NETDATA_RRDDISKPROTOCOL_H
|
||||
|
||||
#define RRDENG_BLOCK_SIZE (4096)
|
||||
#define RRDFILE_ALIGNMENT RRDENG_BLOCK_SIZE
|
||||
|
||||
#define RRDENG_MAGIC_SZ (32)
|
||||
#define RRDENG_DF_MAGIC "netdata-data-file"
|
||||
#define RRDENG_JF_MAGIC "netdata-journal-file"
|
||||
|
||||
#define RRDENG_VER_SZ (16)
|
||||
#define RRDENG_DF_VER "1.0"
|
||||
#define RRDENG_JF_VER "1.0"
|
||||
|
||||
#define UUID_SZ (16)
|
||||
#define CHECKSUM_SZ (4) /* CRC32 */
|
||||
|
||||
#define RRD_NO_COMPRESSION (0)
|
||||
#define RRD_LZ4 (1)
|
||||
|
||||
#define RRDENG_DF_SB_PADDING_SZ (RRDENG_BLOCK_SIZE - (RRDENG_MAGIC_SZ + RRDENG_VER_SZ + sizeof(uint8_t)))
|
||||
/*
|
||||
* Data file persistent super-block
|
||||
*/
|
||||
struct rrdeng_df_sb {
|
||||
char magic_number[RRDENG_MAGIC_SZ];
|
||||
char version[RRDENG_VER_SZ];
|
||||
uint8_t tier;
|
||||
uint8_t padding[RRDENG_DF_SB_PADDING_SZ];
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/*
|
||||
* Page types
|
||||
*/
|
||||
#define PAGE_METRICS (0)
|
||||
#define PAGE_LOGS (1) /* reserved */
|
||||
|
||||
/*
|
||||
* Data file page descriptor
|
||||
*/
|
||||
struct rrdeng_extent_page_descr {
|
||||
uint8_t type;
|
||||
|
||||
uint8_t uuid[UUID_SZ];
|
||||
uint32_t page_length;
|
||||
uint64_t start_time;
|
||||
uint64_t end_time;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/*
|
||||
* Data file extent header
|
||||
*/
|
||||
struct rrdeng_df_extent_header {
|
||||
uint32_t payload_length;
|
||||
uint8_t compression_algorithm;
|
||||
uint8_t number_of_pages;
|
||||
/* #number_of_pages page descriptors follow */
|
||||
struct rrdeng_extent_page_descr descr[];
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/*
|
||||
* Data file extent trailer
|
||||
*/
|
||||
struct rrdeng_df_extent_trailer {
|
||||
uint8_t checksum[CHECKSUM_SZ]; /* CRC32 */
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#define RRDENG_JF_SB_PADDING_SZ (RRDENG_BLOCK_SIZE - (RRDENG_MAGIC_SZ + RRDENG_VER_SZ))
|
||||
/*
|
||||
* Journal file super-block
|
||||
*/
|
||||
struct rrdeng_jf_sb {
|
||||
char magic_number[RRDENG_MAGIC_SZ];
|
||||
char version[RRDENG_VER_SZ];
|
||||
uint8_t padding[RRDENG_JF_SB_PADDING_SZ];
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/*
|
||||
* Transaction record types
|
||||
*/
|
||||
#define STORE_PADDING (0)
|
||||
#define STORE_DATA (1)
|
||||
#define STORE_LOGS (2) /* reserved */
|
||||
|
||||
/*
|
||||
* Journal file transaction record header
|
||||
*/
|
||||
struct rrdeng_jf_transaction_header {
|
||||
/* when set to STORE_PADDING jump to start of next block */
|
||||
uint8_t type;
|
||||
|
||||
uint32_t reserved; /* reserved for future use */
|
||||
uint64_t id;
|
||||
uint16_t payload_length;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/*
|
||||
* Journal file transaction record trailer
|
||||
*/
|
||||
struct rrdeng_jf_transaction_trailer {
|
||||
uint8_t checksum[CHECKSUM_SZ]; /* CRC32 */
|
||||
} __attribute__ ((packed));
|
||||
|
||||
/*
|
||||
* Journal file STORE_DATA action
|
||||
*/
|
||||
struct rrdeng_jf_store_data {
|
||||
/* data file extent information */
|
||||
uint64_t extent_offset;
|
||||
uint32_t extent_size;
|
||||
|
||||
uint8_t number_of_pages;
|
||||
/* #number_of_pages page descriptors follow */
|
||||
struct rrdeng_extent_page_descr descr[];
|
||||
} __attribute__ ((packed));
|
||||
|
||||
#endif /* NETDATA_RRDDISKPROTOCOL_H */
|
780
database/engine/rrdengine.c
Normal file
780
database/engine/rrdengine.c
Normal file
|
@ -0,0 +1,780 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#define NETDATA_RRD_INTERNALS
|
||||
|
||||
#include "rrdengine.h"
|
||||
|
||||
void sanity_check(void)
|
||||
{
|
||||
/* Magic numbers must fit in the super-blocks */
|
||||
BUILD_BUG_ON(strlen(RRDENG_DF_MAGIC) > RRDENG_MAGIC_SZ);
|
||||
BUILD_BUG_ON(strlen(RRDENG_JF_MAGIC) > RRDENG_MAGIC_SZ);
|
||||
|
||||
/* Version strings must fit in the super-blocks */
|
||||
BUILD_BUG_ON(strlen(RRDENG_DF_VER) > RRDENG_VER_SZ);
|
||||
BUILD_BUG_ON(strlen(RRDENG_JF_VER) > RRDENG_VER_SZ);
|
||||
|
||||
/* Data file super-block cannot be larger than RRDENG_BLOCK_SIZE */
|
||||
BUILD_BUG_ON(RRDENG_DF_SB_PADDING_SZ < 0);
|
||||
|
||||
BUILD_BUG_ON(sizeof(uuid_t) != UUID_SZ); /* check UUID size */
|
||||
|
||||
/* page count must fit in 8 bits */
|
||||
BUILD_BUG_ON(MAX_PAGES_PER_EXTENT > 255);
|
||||
}
|
||||
|
||||
void read_extent_cb(uv_fs_t* req)
|
||||
{
|
||||
struct rrdengine_worker_config* wc = req->loop->data;
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
struct extent_io_descriptor *xt_io_descr;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
int ret;
|
||||
unsigned i, j, count;
|
||||
void *page, *uncompressed_buf = NULL;
|
||||
uint32_t payload_length, payload_offset, page_offset, uncompressed_payload_length;
|
||||
struct rrdengine_datafile *datafile;
|
||||
/* persistent structures */
|
||||
struct rrdeng_df_extent_header *header;
|
||||
struct rrdeng_df_extent_trailer *trailer;
|
||||
uLong crc;
|
||||
|
||||
xt_io_descr = req->data;
|
||||
if (req->result < 0) {
|
||||
error("%s: uv_fs_read: %s", __func__, uv_strerror((int)req->result));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
header = xt_io_descr->buf;
|
||||
payload_length = header->payload_length;
|
||||
count = header->number_of_pages;
|
||||
|
||||
payload_offset = sizeof(*header) + sizeof(header->descr[0]) * count;
|
||||
|
||||
trailer = xt_io_descr->buf + xt_io_descr->bytes - sizeof(*trailer);
|
||||
crc = crc32(0L, Z_NULL, 0);
|
||||
crc = crc32(crc, xt_io_descr->buf, xt_io_descr->bytes - sizeof(*trailer));
|
||||
ret = crc32cmp(trailer->checksum, crc);
|
||||
datafile = xt_io_descr->descr_array[0]->extent->datafile;
|
||||
debug(D_RRDENGINE, "%s: Extent at offset %"PRIu64"(%u) was read from datafile %u-%u. CRC32 check: %s", __func__,
|
||||
xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno, ret ? "FAILED" : "SUCCEEDED");
|
||||
if (unlikely(ret)) {
|
||||
/* TODO: handle errors */
|
||||
exit(UV_EIO);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (RRD_NO_COMPRESSION != header->compression_algorithm) {
|
||||
uncompressed_payload_length = 0;
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
uncompressed_payload_length += header->descr[i].page_length;
|
||||
}
|
||||
uncompressed_buf = mallocz(uncompressed_payload_length);
|
||||
ret = LZ4_decompress_safe(xt_io_descr->buf + payload_offset, uncompressed_buf,
|
||||
payload_length, uncompressed_payload_length);
|
||||
ctx->stats.before_decompress_bytes += payload_length;
|
||||
ctx->stats.after_decompress_bytes += ret;
|
||||
debug(D_RRDENGINE, "LZ4 decompressed %u bytes to %d bytes.", payload_length, ret);
|
||||
/* care, we don't hold the descriptor mutex */
|
||||
}
|
||||
|
||||
for (i = 0 ; i < xt_io_descr->descr_count; ++i) {
|
||||
page = mallocz(RRDENG_BLOCK_SIZE);
|
||||
descr = xt_io_descr->descr_array[i];
|
||||
for (j = 0, page_offset = 0; j < count; ++j) {
|
||||
/* care, we don't hold the descriptor mutex */
|
||||
if (!uuid_compare(*(uuid_t *) header->descr[j].uuid, *descr->id) &&
|
||||
header->descr[j].page_length == descr->page_length &&
|
||||
header->descr[j].start_time == descr->start_time &&
|
||||
header->descr[j].end_time == descr->end_time) {
|
||||
break;
|
||||
}
|
||||
page_offset += header->descr[j].page_length;
|
||||
}
|
||||
/* care, we don't hold the descriptor mutex */
|
||||
if (RRD_NO_COMPRESSION == header->compression_algorithm) {
|
||||
(void) memcpy(page, xt_io_descr->buf + payload_offset + page_offset, descr->page_length);
|
||||
} else {
|
||||
(void) memcpy(page, uncompressed_buf + page_offset, descr->page_length);
|
||||
}
|
||||
pg_cache_replaceQ_insert(ctx, descr);
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
descr->page = page;
|
||||
descr->flags |= RRD_PAGE_POPULATED;
|
||||
descr->flags &= ~RRD_PAGE_READ_PENDING;
|
||||
debug(D_RRDENGINE, "%s: Waking up waiters.", __func__);
|
||||
if (xt_io_descr->release_descr) {
|
||||
pg_cache_put_unsafe(descr);
|
||||
} else {
|
||||
pg_cache_wake_up_waiters_unsafe(descr);
|
||||
}
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
}
|
||||
if (RRD_NO_COMPRESSION != header->compression_algorithm) {
|
||||
free(uncompressed_buf);
|
||||
}
|
||||
if (xt_io_descr->completion)
|
||||
complete(xt_io_descr->completion);
|
||||
cleanup:
|
||||
uv_fs_req_cleanup(req);
|
||||
free(xt_io_descr->buf);
|
||||
free(xt_io_descr);
|
||||
}
|
||||
|
||||
|
||||
static void do_read_extent(struct rrdengine_worker_config* wc,
|
||||
struct rrdeng_page_cache_descr **descr,
|
||||
unsigned count,
|
||||
uint8_t release_descr)
|
||||
{
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
int ret;
|
||||
unsigned i, size_bytes, pos, real_io_size;
|
||||
// uint32_t payload_length;
|
||||
struct extent_io_descriptor *xt_io_descr;
|
||||
struct rrdengine_datafile *datafile;
|
||||
|
||||
datafile = descr[0]->extent->datafile;
|
||||
pos = descr[0]->extent->offset;
|
||||
size_bytes = descr[0]->extent->size;
|
||||
|
||||
xt_io_descr = mallocz(sizeof(*xt_io_descr));
|
||||
ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes));
|
||||
if (unlikely(ret)) {
|
||||
fatal("posix_memalign:%s", strerror(ret));
|
||||
/* free(xt_io_descr);
|
||||
return;*/
|
||||
}
|
||||
for (i = 0 ; i < count; ++i) {
|
||||
uv_mutex_lock(&descr[i]->mutex);
|
||||
descr[i]->flags |= RRD_PAGE_READ_PENDING;
|
||||
// payload_length = descr[i]->page_length;
|
||||
uv_mutex_unlock(&descr[i]->mutex);
|
||||
|
||||
xt_io_descr->descr_array[i] = descr[i];
|
||||
}
|
||||
xt_io_descr->descr_count = count;
|
||||
xt_io_descr->bytes = size_bytes;
|
||||
xt_io_descr->pos = pos;
|
||||
xt_io_descr->req.data = xt_io_descr;
|
||||
xt_io_descr->completion = NULL;
|
||||
/* xt_io_descr->descr_commit_idx_array[0] */
|
||||
xt_io_descr->release_descr = release_descr;
|
||||
|
||||
real_io_size = ALIGN_BYTES_CEILING(size_bytes);
|
||||
xt_io_descr->iov = uv_buf_init((void *)xt_io_descr->buf, real_io_size);
|
||||
ret = uv_fs_read(wc->loop, &xt_io_descr->req, datafile->file, &xt_io_descr->iov, 1, pos, read_extent_cb);
|
||||
assert (-1 != ret);
|
||||
ctx->stats.io_read_bytes += real_io_size;
|
||||
++ctx->stats.io_read_requests;
|
||||
ctx->stats.io_read_extent_bytes += real_io_size;
|
||||
++ctx->stats.io_read_extents;
|
||||
ctx->stats.pg_cache_backfills += count;
|
||||
}
|
||||
|
||||
static void commit_data_extent(struct rrdengine_worker_config* wc, struct extent_io_descriptor *xt_io_descr)
|
||||
{
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
unsigned count, payload_length, descr_size, size_bytes;
|
||||
void *buf;
|
||||
/* persistent structures */
|
||||
struct rrdeng_df_extent_header *df_header;
|
||||
struct rrdeng_jf_transaction_header *jf_header;
|
||||
struct rrdeng_jf_store_data *jf_metric_data;
|
||||
struct rrdeng_jf_transaction_trailer *jf_trailer;
|
||||
uLong crc;
|
||||
|
||||
df_header = xt_io_descr->buf;
|
||||
count = df_header->number_of_pages;
|
||||
descr_size = sizeof(*jf_metric_data->descr) * count;
|
||||
payload_length = sizeof(*jf_metric_data) + descr_size;
|
||||
size_bytes = sizeof(*jf_header) + payload_length + sizeof(*jf_trailer);
|
||||
|
||||
buf = wal_get_transaction_buffer(wc, size_bytes);
|
||||
|
||||
jf_header = buf;
|
||||
jf_header->type = STORE_DATA;
|
||||
jf_header->reserved = 0;
|
||||
jf_header->id = ctx->commit_log.transaction_id++;
|
||||
jf_header->payload_length = payload_length;
|
||||
|
||||
jf_metric_data = buf + sizeof(*jf_header);
|
||||
jf_metric_data->extent_offset = xt_io_descr->pos;
|
||||
jf_metric_data->extent_size = xt_io_descr->bytes;
|
||||
jf_metric_data->number_of_pages = count;
|
||||
memcpy(jf_metric_data->descr, df_header->descr, descr_size);
|
||||
|
||||
jf_trailer = buf + sizeof(*jf_header) + payload_length;
|
||||
crc = crc32(0L, Z_NULL, 0);
|
||||
crc = crc32(crc, buf, sizeof(*jf_header) + payload_length);
|
||||
crc32set(jf_trailer->checksum, crc);
|
||||
}
|
||||
|
||||
static void do_commit_transaction(struct rrdengine_worker_config* wc, uint8_t type, void *data)
|
||||
{
|
||||
switch (type) {
|
||||
case STORE_DATA:
|
||||
commit_data_extent(wc, (struct extent_io_descriptor *)data);
|
||||
break;
|
||||
default:
|
||||
assert(type == STORE_DATA);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void flush_pages_cb(uv_fs_t* req)
|
||||
{
|
||||
struct rrdengine_worker_config* wc = req->loop->data;
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
struct extent_io_descriptor *xt_io_descr;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
struct rrdengine_datafile *datafile;
|
||||
int ret;
|
||||
unsigned i, count;
|
||||
Word_t commit_id;
|
||||
|
||||
xt_io_descr = req->data;
|
||||
if (req->result < 0) {
|
||||
error("%s: uv_fs_write: %s", __func__, uv_strerror((int)req->result));
|
||||
goto cleanup;
|
||||
}
|
||||
datafile = xt_io_descr->descr_array[0]->extent->datafile;
|
||||
debug(D_RRDENGINE, "%s: Extent at offset %"PRIu64"(%u) was written to datafile %u-%u. Waking up waiters.",
|
||||
__func__, xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno);
|
||||
|
||||
count = xt_io_descr->descr_count;
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
/* care, we don't hold the descriptor mutex */
|
||||
descr = xt_io_descr->descr_array[i];
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->commited_page_index.lock);
|
||||
commit_id = xt_io_descr->descr_commit_idx_array[i];
|
||||
ret = JudyLDel(&pg_cache->commited_page_index.JudyL_array, commit_id, PJE0);
|
||||
assert(1 == ret);
|
||||
--pg_cache->commited_page_index.nr_commited_pages;
|
||||
uv_rwlock_wrunlock(&pg_cache->commited_page_index.lock);
|
||||
|
||||
pg_cache_replaceQ_insert(ctx, descr);
|
||||
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
descr->flags &= ~(RRD_PAGE_DIRTY | RRD_PAGE_WRITE_PENDING);
|
||||
/* wake up waiters, care no reference being held */
|
||||
pg_cache_wake_up_waiters_unsafe(descr);
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
}
|
||||
if (xt_io_descr->completion)
|
||||
complete(xt_io_descr->completion);
|
||||
cleanup:
|
||||
uv_fs_req_cleanup(req);
|
||||
free(xt_io_descr->buf);
|
||||
free(xt_io_descr);
|
||||
}
|
||||
|
||||
/*
|
||||
* completion must be NULL or valid.
|
||||
* Returns 0 when no flushing can take place.
|
||||
* Returns datafile bytes to be written on successful flushing initiation.
|
||||
*/
|
||||
static int do_flush_pages(struct rrdengine_worker_config* wc, int force, struct completion *completion)
|
||||
{
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
int ret;
|
||||
int compressed_size, max_compressed_size = 0;
|
||||
unsigned i, count, size_bytes, pos, real_io_size;
|
||||
uint32_t uncompressed_payload_length, payload_offset;
|
||||
struct rrdeng_page_cache_descr *descr, *eligible_pages[MAX_PAGES_PER_EXTENT];
|
||||
struct extent_io_descriptor *xt_io_descr;
|
||||
void *compressed_buf = NULL;
|
||||
Word_t descr_commit_idx_array[MAX_PAGES_PER_EXTENT];
|
||||
Pvoid_t *PValue;
|
||||
Word_t Index;
|
||||
uint8_t compression_algorithm = ctx->global_compress_alg;
|
||||
struct extent_info *extent;
|
||||
struct rrdengine_datafile *datafile;
|
||||
/* persistent structures */
|
||||
struct rrdeng_df_extent_header *header;
|
||||
struct rrdeng_df_extent_trailer *trailer;
|
||||
uLong crc;
|
||||
|
||||
if (force) {
|
||||
debug(D_RRDENGINE, "Asynchronous flushing of extent has been forced by page pressure.");
|
||||
}
|
||||
uv_rwlock_rdlock(&pg_cache->commited_page_index.lock);
|
||||
for (Index = 0, count = 0, uncompressed_payload_length = 0,
|
||||
PValue = JudyLFirst(pg_cache->commited_page_index.JudyL_array, &Index, PJE0),
|
||||
descr = unlikely(NULL == PValue) ? NULL : *PValue ;
|
||||
|
||||
descr != NULL && count != MAX_PAGES_PER_EXTENT ;
|
||||
|
||||
PValue = JudyLNext(pg_cache->commited_page_index.JudyL_array, &Index, PJE0),
|
||||
descr = unlikely(NULL == PValue) ? NULL : *PValue) {
|
||||
assert(0 != descr->page_length);
|
||||
|
||||
uv_mutex_lock(&descr->mutex);
|
||||
if (!(descr->flags & RRD_PAGE_WRITE_PENDING)) {
|
||||
/* care, no reference being held */
|
||||
descr->flags |= RRD_PAGE_WRITE_PENDING;
|
||||
uncompressed_payload_length += descr->page_length;
|
||||
descr_commit_idx_array[count] = Index;
|
||||
eligible_pages[count++] = descr;
|
||||
}
|
||||
uv_mutex_unlock(&descr->mutex);
|
||||
}
|
||||
uv_rwlock_rdunlock(&pg_cache->commited_page_index.lock);
|
||||
|
||||
if (!count) {
|
||||
debug(D_RRDENGINE, "%s: no pages eligible for flushing.", __func__);
|
||||
if (completion)
|
||||
complete(completion);
|
||||
return 0;
|
||||
}
|
||||
xt_io_descr = mallocz(sizeof(*xt_io_descr));
|
||||
payload_offset = sizeof(*header) + count * sizeof(header->descr[0]);
|
||||
switch (compression_algorithm) {
|
||||
case RRD_NO_COMPRESSION:
|
||||
size_bytes = payload_offset + uncompressed_payload_length + sizeof(*trailer);
|
||||
break;
|
||||
default: /* Compress */
|
||||
assert(uncompressed_payload_length < LZ4_MAX_INPUT_SIZE);
|
||||
max_compressed_size = LZ4_compressBound(uncompressed_payload_length);
|
||||
compressed_buf = mallocz(max_compressed_size);
|
||||
size_bytes = payload_offset + MAX(uncompressed_payload_length, (unsigned)max_compressed_size) + sizeof(*trailer);
|
||||
break;
|
||||
}
|
||||
ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes));
|
||||
if (unlikely(ret)) {
|
||||
fatal("posix_memalign:%s", strerror(ret));
|
||||
/* free(xt_io_descr);*/
|
||||
}
|
||||
(void) memcpy(xt_io_descr->descr_array, eligible_pages, sizeof(struct rrdeng_page_cache_descr *) * count);
|
||||
xt_io_descr->descr_count = count;
|
||||
|
||||
pos = 0;
|
||||
header = xt_io_descr->buf;
|
||||
header->compression_algorithm = compression_algorithm;
|
||||
header->number_of_pages = count;
|
||||
pos += sizeof(*header);
|
||||
|
||||
extent = mallocz(sizeof(*extent) + count * sizeof(extent->pages[0]));
|
||||
datafile = ctx->datafiles.last; /* TODO: check for exceeded size quota */
|
||||
extent->offset = datafile->pos;
|
||||
extent->number_of_pages = count;
|
||||
extent->datafile = datafile;
|
||||
extent->next = NULL;
|
||||
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
/* This is here for performance reasons */
|
||||
xt_io_descr->descr_commit_idx_array[i] = descr_commit_idx_array[i];
|
||||
|
||||
descr = xt_io_descr->descr_array[i];
|
||||
header->descr[i].type = PAGE_METRICS;
|
||||
uuid_copy(*(uuid_t *)header->descr[i].uuid, *descr->id);
|
||||
header->descr[i].page_length = descr->page_length;
|
||||
header->descr[i].start_time = descr->start_time;
|
||||
header->descr[i].end_time = descr->end_time;
|
||||
pos += sizeof(header->descr[i]);
|
||||
}
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
descr = xt_io_descr->descr_array[i];
|
||||
/* care, we don't hold the descriptor mutex */
|
||||
(void) memcpy(xt_io_descr->buf + pos, descr->page, descr->page_length);
|
||||
descr->extent = extent;
|
||||
extent->pages[i] = descr;
|
||||
|
||||
pos += descr->page_length;
|
||||
}
|
||||
df_extent_insert(extent);
|
||||
|
||||
switch (compression_algorithm) {
|
||||
case RRD_NO_COMPRESSION:
|
||||
header->payload_length = uncompressed_payload_length;
|
||||
break;
|
||||
default: /* Compress */
|
||||
compressed_size = LZ4_compress_default(xt_io_descr->buf + payload_offset, compressed_buf,
|
||||
uncompressed_payload_length, max_compressed_size);
|
||||
ctx->stats.before_compress_bytes += uncompressed_payload_length;
|
||||
ctx->stats.after_compress_bytes += compressed_size;
|
||||
debug(D_RRDENGINE, "LZ4 compressed %"PRIu32" bytes to %d bytes.", uncompressed_payload_length, compressed_size);
|
||||
(void) memcpy(xt_io_descr->buf + payload_offset, compressed_buf, compressed_size);
|
||||
free(compressed_buf);
|
||||
size_bytes = payload_offset + compressed_size + sizeof(*trailer);
|
||||
header->payload_length = compressed_size;
|
||||
break;
|
||||
}
|
||||
extent->size = size_bytes;
|
||||
xt_io_descr->bytes = size_bytes;
|
||||
xt_io_descr->pos = datafile->pos;
|
||||
xt_io_descr->req.data = xt_io_descr;
|
||||
xt_io_descr->completion = completion;
|
||||
|
||||
trailer = xt_io_descr->buf + size_bytes - sizeof(*trailer);
|
||||
crc = crc32(0L, Z_NULL, 0);
|
||||
crc = crc32(crc, xt_io_descr->buf, size_bytes - sizeof(*trailer));
|
||||
crc32set(trailer->checksum, crc);
|
||||
|
||||
real_io_size = ALIGN_BYTES_CEILING(size_bytes);
|
||||
xt_io_descr->iov = uv_buf_init((void *)xt_io_descr->buf, real_io_size);
|
||||
ret = uv_fs_write(wc->loop, &xt_io_descr->req, datafile->file, &xt_io_descr->iov, 1, datafile->pos, flush_pages_cb);
|
||||
assert (-1 != ret);
|
||||
ctx->stats.io_write_bytes += real_io_size;
|
||||
++ctx->stats.io_write_requests;
|
||||
ctx->stats.io_write_extent_bytes += real_io_size;
|
||||
++ctx->stats.io_write_extents;
|
||||
do_commit_transaction(wc, STORE_DATA, xt_io_descr);
|
||||
datafile->pos += ALIGN_BYTES_CEILING(size_bytes);
|
||||
ctx->disk_space += ALIGN_BYTES_CEILING(size_bytes);
|
||||
rrdeng_test_quota(wc);
|
||||
|
||||
return ALIGN_BYTES_CEILING(size_bytes);
|
||||
}
|
||||
|
||||
static void after_delete_old_data(uv_work_t *req, int status)
|
||||
{
|
||||
struct rrdengine_instance *ctx = req->data;
|
||||
struct rrdengine_worker_config* wc = &ctx->worker_config;
|
||||
struct rrdengine_datafile *datafile;
|
||||
struct rrdengine_journalfile *journalfile;
|
||||
unsigned bytes;
|
||||
|
||||
(void)status;
|
||||
datafile = ctx->datafiles.first;
|
||||
journalfile = datafile->journalfile;
|
||||
bytes = datafile->pos + journalfile->pos;
|
||||
|
||||
datafile_list_delete(ctx, datafile);
|
||||
destroy_journal_file(journalfile, datafile);
|
||||
destroy_data_file(datafile);
|
||||
info("Deleted data file \""DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION"\".",
|
||||
datafile->tier, datafile->fileno);
|
||||
free(journalfile);
|
||||
free(datafile);
|
||||
|
||||
ctx->disk_space -= bytes;
|
||||
info("Reclaimed %u bytes of disk space.", bytes);
|
||||
|
||||
/* unfreeze command processing */
|
||||
wc->now_deleting.data = NULL;
|
||||
/* wake up event loop */
|
||||
assert(0 == uv_async_send(&wc->async));
|
||||
}
|
||||
|
||||
static void delete_old_data(uv_work_t *req)
|
||||
{
|
||||
struct rrdengine_instance *ctx = req->data;
|
||||
struct rrdengine_datafile *datafile;
|
||||
struct extent_info *extent, *next;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
unsigned count, i;
|
||||
|
||||
/* Safe to use since it will be deleted after we are done */
|
||||
datafile = ctx->datafiles.first;
|
||||
|
||||
for (extent = datafile->extents.first ; extent != NULL ; extent = next) {
|
||||
count = extent->number_of_pages;
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
descr = extent->pages[i];
|
||||
pg_cache_punch_hole(ctx, descr);
|
||||
}
|
||||
next = extent->next;
|
||||
free(extent);
|
||||
}
|
||||
}
|
||||
|
||||
void rrdeng_test_quota(struct rrdengine_worker_config* wc)
|
||||
{
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
struct rrdengine_datafile *datafile;
|
||||
unsigned current_size, target_size;
|
||||
uint8_t out_of_space, only_one_datafile;
|
||||
|
||||
out_of_space = 0;
|
||||
if (unlikely(ctx->disk_space > ctx->max_disk_space)) {
|
||||
out_of_space = 1;
|
||||
}
|
||||
datafile = ctx->datafiles.last;
|
||||
current_size = datafile->pos;
|
||||
target_size = ctx->max_disk_space / TARGET_DATAFILES;
|
||||
target_size = MIN(target_size, MAX_DATAFILE_SIZE);
|
||||
target_size = MAX(target_size, MIN_DATAFILE_SIZE);
|
||||
only_one_datafile = (datafile == ctx->datafiles.first) ? 1 : 0;
|
||||
if (unlikely(current_size >= target_size || (out_of_space && only_one_datafile))) {
|
||||
/* Finalize data and journal file and create a new pair */
|
||||
wal_flush_transaction_buffer(wc);
|
||||
create_new_datafile_pair(ctx, 1, datafile->fileno + 1);
|
||||
}
|
||||
if (unlikely(out_of_space)) {
|
||||
/* delete old data */
|
||||
if (wc->now_deleting.data) {
|
||||
/* already deleting data */
|
||||
return;
|
||||
}
|
||||
info("Deleting data file \""DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION"\".",
|
||||
ctx->datafiles.first->tier, ctx->datafiles.first->fileno);
|
||||
wc->now_deleting.data = ctx;
|
||||
uv_queue_work(wc->loop, &wc->now_deleting, delete_old_data, after_delete_old_data);
|
||||
}
|
||||
}
|
||||
|
||||
int init_rrd_files(struct rrdengine_instance *ctx)
|
||||
{
|
||||
return init_data_files(ctx);
|
||||
}
|
||||
|
||||
void rrdeng_init_cmd_queue(struct rrdengine_worker_config* wc)
|
||||
{
|
||||
wc->cmd_queue.head = wc->cmd_queue.tail = 0;
|
||||
wc->queue_size = 0;
|
||||
assert(0 == uv_cond_init(&wc->cmd_cond));
|
||||
assert(0 == uv_mutex_init(&wc->cmd_mutex));
|
||||
}
|
||||
|
||||
void rrdeng_enq_cmd(struct rrdengine_worker_config* wc, struct rrdeng_cmd *cmd)
|
||||
{
|
||||
unsigned queue_size;
|
||||
|
||||
/* wait for free space in queue */
|
||||
uv_mutex_lock(&wc->cmd_mutex);
|
||||
while ((queue_size = wc->queue_size) == RRDENG_CMD_Q_MAX_SIZE) {
|
||||
uv_cond_wait(&wc->cmd_cond, &wc->cmd_mutex);
|
||||
}
|
||||
assert(queue_size < RRDENG_CMD_Q_MAX_SIZE);
|
||||
/* enqueue command */
|
||||
wc->cmd_queue.cmd_array[wc->cmd_queue.tail] = *cmd;
|
||||
wc->cmd_queue.tail = wc->cmd_queue.tail != RRDENG_CMD_Q_MAX_SIZE - 1 ?
|
||||
wc->cmd_queue.tail + 1 : 0;
|
||||
wc->queue_size = queue_size + 1;
|
||||
uv_mutex_unlock(&wc->cmd_mutex);
|
||||
|
||||
/* wake up event loop */
|
||||
assert(0 == uv_async_send(&wc->async));
|
||||
}
|
||||
|
||||
struct rrdeng_cmd rrdeng_deq_cmd(struct rrdengine_worker_config* wc)
|
||||
{
|
||||
struct rrdeng_cmd ret;
|
||||
unsigned queue_size;
|
||||
|
||||
uv_mutex_lock(&wc->cmd_mutex);
|
||||
queue_size = wc->queue_size;
|
||||
if (queue_size == 0) {
|
||||
ret.opcode = RRDENG_NOOP;
|
||||
} else {
|
||||
/* dequeue command */
|
||||
ret = wc->cmd_queue.cmd_array[wc->cmd_queue.head];
|
||||
if (queue_size == 1) {
|
||||
wc->cmd_queue.head = wc->cmd_queue.tail = 0;
|
||||
} else {
|
||||
wc->cmd_queue.head = wc->cmd_queue.head != RRDENG_CMD_Q_MAX_SIZE - 1 ?
|
||||
wc->cmd_queue.head + 1 : 0;
|
||||
}
|
||||
wc->queue_size = queue_size - 1;
|
||||
|
||||
/* wake up producers */
|
||||
uv_cond_signal(&wc->cmd_cond);
|
||||
}
|
||||
uv_mutex_unlock(&wc->cmd_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void async_cb(uv_async_t *handle)
|
||||
{
|
||||
uv_stop(handle->loop);
|
||||
uv_update_time(handle->loop);
|
||||
debug(D_RRDENGINE, "%s called, active=%d.", __func__, uv_is_active((uv_handle_t *)handle));
|
||||
}
|
||||
|
||||
void timer_cb(uv_timer_t* handle)
|
||||
{
|
||||
struct rrdengine_worker_config* wc = handle->data;
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
|
||||
uv_stop(handle->loop);
|
||||
uv_update_time(handle->loop);
|
||||
rrdeng_test_quota(wc);
|
||||
debug(D_RRDENGINE, "%s: timeout reached.", __func__);
|
||||
if (likely(!wc->now_deleting.data)) {
|
||||
unsigned total_bytes, bytes_written;
|
||||
|
||||
/* There is free space so we can write to disk */
|
||||
debug(D_RRDENGINE, "Flushing pages to disk.");
|
||||
for (total_bytes = bytes_written = do_flush_pages(wc, 0, NULL) ;
|
||||
bytes_written && (total_bytes < DATAFILE_IDEAL_IO_SIZE) ;
|
||||
total_bytes += bytes_written) {
|
||||
bytes_written = do_flush_pages(wc, 0, NULL);
|
||||
}
|
||||
}
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
{
|
||||
char buf[4096];
|
||||
debug(D_RRDENGINE, "%s", get_rrdeng_statistics(ctx, buf, sizeof(buf)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Flushes dirty pages when timer expires */
|
||||
#define TIMER_PERIOD_MS (1000)
|
||||
|
||||
#define CMD_BATCH_SIZE (256)
|
||||
|
||||
void rrdeng_worker(void* arg)
|
||||
{
|
||||
struct rrdengine_worker_config* wc = arg;
|
||||
struct rrdengine_instance *ctx = wc->ctx;
|
||||
uv_loop_t* loop;
|
||||
int shutdown;
|
||||
enum rrdeng_opcode opcode;
|
||||
uv_timer_t timer_req;
|
||||
struct rrdeng_cmd cmd;
|
||||
|
||||
rrdeng_init_cmd_queue(wc);
|
||||
|
||||
loop = wc->loop = mallocz(sizeof(uv_loop_t));
|
||||
uv_loop_init(loop);
|
||||
loop->data = wc;
|
||||
|
||||
uv_async_init(wc->loop, &wc->async, async_cb);
|
||||
wc->async.data = wc;
|
||||
|
||||
wc->now_deleting.data = NULL;
|
||||
|
||||
/* dirty page flushing timer */
|
||||
uv_timer_init(loop, &timer_req);
|
||||
timer_req.data = wc;
|
||||
|
||||
/* wake up initialization thread */
|
||||
complete(&ctx->rrdengine_completion);
|
||||
|
||||
uv_timer_start(&timer_req, timer_cb, TIMER_PERIOD_MS, TIMER_PERIOD_MS);
|
||||
shutdown = 0;
|
||||
while (shutdown == 0 || uv_loop_alive(loop)) {
|
||||
uv_run(loop, UV_RUN_DEFAULT);
|
||||
/* wait for commands */
|
||||
do {
|
||||
cmd = rrdeng_deq_cmd(wc);
|
||||
opcode = cmd.opcode;
|
||||
|
||||
switch (opcode) {
|
||||
case RRDENG_NOOP:
|
||||
/* the command queue was empty, do nothing */
|
||||
break;
|
||||
case RRDENG_SHUTDOWN:
|
||||
shutdown = 1;
|
||||
if (unlikely(wc->now_deleting.data)) {
|
||||
/* postpone shutdown until after deletion */
|
||||
info("Postponing shutting RRD engine event loop down until after datafile deletion is finished.");
|
||||
rrdeng_enq_cmd(wc, &cmd);
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* uv_async_send after uv_close does not seem to crash in linux at the moment,
|
||||
* it is however undocumented behaviour and we need to be aware if this becomes
|
||||
* an issue in the future.
|
||||
*/
|
||||
uv_close((uv_handle_t *)&wc->async, NULL);
|
||||
assert(0 == uv_timer_stop(&timer_req));
|
||||
uv_close((uv_handle_t *)&timer_req, NULL);
|
||||
info("Shutting down RRD engine event loop.");
|
||||
while (do_flush_pages(wc, 1, NULL)) {
|
||||
; /* Force flushing of all commited pages. */
|
||||
}
|
||||
break;
|
||||
case RRDENG_READ_PAGE:
|
||||
do_read_extent(wc, &cmd.read_page.page_cache_descr, 1, 0);
|
||||
break;
|
||||
case RRDENG_READ_EXTENT:
|
||||
do_read_extent(wc, cmd.read_extent.page_cache_descr, cmd.read_extent.page_count, 1);
|
||||
break;
|
||||
case RRDENG_COMMIT_PAGE:
|
||||
do_commit_transaction(wc, STORE_DATA, NULL);
|
||||
break;
|
||||
case RRDENG_FLUSH_PAGES: {
|
||||
unsigned total_bytes, bytes_written;
|
||||
|
||||
/* First I/O should be enough to call completion */
|
||||
bytes_written = do_flush_pages(wc, 1, cmd.completion);
|
||||
for (total_bytes = bytes_written ;
|
||||
bytes_written && (total_bytes < DATAFILE_IDEAL_IO_SIZE) ;
|
||||
total_bytes += bytes_written) {
|
||||
bytes_written = do_flush_pages(wc, 1, NULL);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
debug(D_RRDENGINE, "%s: default.", __func__);
|
||||
break;
|
||||
}
|
||||
} while (opcode != RRDENG_NOOP);
|
||||
}
|
||||
/* cleanup operations of the event loop */
|
||||
wal_flush_transaction_buffer(wc);
|
||||
uv_run(loop, UV_RUN_DEFAULT);
|
||||
|
||||
info("Shutting down RRD engine event loop complete.");
|
||||
/* TODO: don't let the API block by waiting to enqueue commands */
|
||||
uv_cond_destroy(&wc->cmd_cond);
|
||||
/* uv_mutex_destroy(&wc->cmd_mutex); */
|
||||
assert(0 == uv_loop_close(loop));
|
||||
free(loop);
|
||||
}
|
||||
|
||||
|
||||
#define NR_PAGES (256)
|
||||
static void basic_functional_test(struct rrdengine_instance *ctx)
|
||||
{
|
||||
int i, j, failed_validations;
|
||||
uuid_t uuid[NR_PAGES];
|
||||
void *buf;
|
||||
struct rrdeng_page_cache_descr *handle[NR_PAGES];
|
||||
char uuid_str[37];
|
||||
char backup[NR_PAGES][37 * 100]; /* backup storage for page data verification */
|
||||
|
||||
for (i = 0 ; i < NR_PAGES ; ++i) {
|
||||
uuid_generate(uuid[i]);
|
||||
uuid_unparse_lower(uuid[i], uuid_str);
|
||||
// fprintf(stderr, "Generated uuid[%d]=%s\n", i, uuid_str);
|
||||
buf = rrdeng_create_page(&uuid[i], &handle[i]);
|
||||
/* Each page contains 10 times its own UUID stringified */
|
||||
for (j = 0 ; j < 100 ; ++j) {
|
||||
strcpy(buf + 37 * j, uuid_str);
|
||||
strcpy(backup[i] + 37 * j, uuid_str);
|
||||
}
|
||||
rrdeng_commit_page(ctx, handle[i], (Word_t)i);
|
||||
}
|
||||
fprintf(stderr, "\n********** CREATED %d METRIC PAGES ***********\n\n", NR_PAGES);
|
||||
failed_validations = 0;
|
||||
for (i = 0 ; i < NR_PAGES ; ++i) {
|
||||
buf = rrdeng_get_latest_page(ctx, &uuid[i], (void **)&handle[i]);
|
||||
if (NULL == buf) {
|
||||
++failed_validations;
|
||||
fprintf(stderr, "Page %d was LOST.\n", i);
|
||||
}
|
||||
if (memcmp(backup[i], buf, 37 * 100)) {
|
||||
++failed_validations;
|
||||
fprintf(stderr, "Page %d data comparison with backup FAILED validation.\n", i);
|
||||
}
|
||||
rrdeng_put_page(ctx, handle[i]);
|
||||
}
|
||||
fprintf(stderr, "\n********** CORRECTLY VALIDATED %d/%d METRIC PAGES ***********\n\n",
|
||||
NR_PAGES - failed_validations, NR_PAGES);
|
||||
|
||||
}
|
||||
/* C entry point for development purposes
|
||||
* make "LDFLAGS=-errdengine_main"
|
||||
*/
|
||||
void rrdengine_main(void)
|
||||
{
|
||||
int ret;
|
||||
struct rrdengine_instance *ctx;
|
||||
|
||||
ret = rrdeng_init(&ctx, "/tmp", RRDENG_MIN_PAGE_CACHE_SIZE_MB, RRDENG_MIN_DISK_SPACE_MB);
|
||||
if (ret) {
|
||||
exit(ret);
|
||||
}
|
||||
basic_functional_test(ctx);
|
||||
|
||||
rrdeng_exit(ctx);
|
||||
fprintf(stderr, "Hello world!");
|
||||
exit(0);
|
||||
}
|
171
database/engine/rrdengine.h
Normal file
171
database/engine/rrdengine.h
Normal file
|
@ -0,0 +1,171 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDENGINE_H
|
||||
#define NETDATA_RRDENGINE_H
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#include <aio.h>
|
||||
#include <uv.h>
|
||||
#include <assert.h>
|
||||
#include <lz4.h>
|
||||
#include <Judy.h>
|
||||
#include <openssl/sha.h>
|
||||
#include <openssl/evp.h>
|
||||
#include <stdint.h>
|
||||
#include "../rrd.h"
|
||||
#include "rrddiskprotocol.h"
|
||||
#include "rrdenginelib.h"
|
||||
#include "datafile.h"
|
||||
#include "journalfile.h"
|
||||
#include "rrdengineapi.h"
|
||||
#include "pagecache.h"
|
||||
|
||||
#ifdef NETDATA_RRD_INTERNALS
|
||||
|
||||
#endif /* NETDATA_RRD_INTERNALS */
|
||||
|
||||
/* Forward declerations */
|
||||
struct rrdengine_instance;
|
||||
|
||||
#define MAX_PAGES_PER_EXTENT (64) /* TODO: can go higher only when journal supports bigger than 4KiB transactions */
|
||||
|
||||
#define RRDENG_FILE_NUMBER_SCAN_TMPL "%1u-%10u"
|
||||
#define RRDENG_FILE_NUMBER_PRINT_TMPL "%1.1u-%10.10u"
|
||||
|
||||
|
||||
typedef enum {
|
||||
RRDENGINE_STATUS_UNINITIALIZED = 0,
|
||||
RRDENGINE_STATUS_INITIALIZING,
|
||||
RRDENGINE_STATUS_INITIALIZED
|
||||
} rrdengine_state_t;
|
||||
|
||||
enum rrdeng_opcode {
|
||||
/* can be used to return empty status or flush the command queue */
|
||||
RRDENG_NOOP = 0,
|
||||
|
||||
RRDENG_READ_PAGE,
|
||||
RRDENG_READ_EXTENT,
|
||||
RRDENG_COMMIT_PAGE,
|
||||
RRDENG_FLUSH_PAGES,
|
||||
RRDENG_SHUTDOWN,
|
||||
|
||||
RRDENG_MAX_OPCODE
|
||||
};
|
||||
|
||||
struct rrdeng_cmd {
|
||||
enum rrdeng_opcode opcode;
|
||||
union {
|
||||
struct rrdeng_read_page {
|
||||
struct rrdeng_page_cache_descr *page_cache_descr;
|
||||
} read_page;
|
||||
struct rrdeng_read_extent {
|
||||
struct rrdeng_page_cache_descr *page_cache_descr[MAX_PAGES_PER_EXTENT];
|
||||
int page_count;
|
||||
} read_extent;
|
||||
struct completion *completion;
|
||||
};
|
||||
};
|
||||
|
||||
#define RRDENG_CMD_Q_MAX_SIZE (2048)
|
||||
|
||||
struct rrdeng_cmdqueue {
|
||||
unsigned head, tail;
|
||||
struct rrdeng_cmd cmd_array[RRDENG_CMD_Q_MAX_SIZE];
|
||||
};
|
||||
|
||||
struct extent_io_descriptor {
|
||||
uv_fs_t req;
|
||||
uv_buf_t iov;
|
||||
void *buf;
|
||||
uint64_t pos;
|
||||
unsigned bytes;
|
||||
struct completion *completion;
|
||||
unsigned descr_count;
|
||||
int release_descr;
|
||||
struct rrdeng_page_cache_descr *descr_array[MAX_PAGES_PER_EXTENT];
|
||||
Word_t descr_commit_idx_array[MAX_PAGES_PER_EXTENT];
|
||||
};
|
||||
|
||||
struct generic_io_descriptor {
|
||||
uv_fs_t req;
|
||||
uv_buf_t iov;
|
||||
void *buf;
|
||||
uint64_t pos;
|
||||
unsigned bytes;
|
||||
struct completion *completion;
|
||||
};
|
||||
|
||||
struct rrdengine_worker_config {
|
||||
struct rrdengine_instance *ctx;
|
||||
|
||||
uv_thread_t thread;
|
||||
uv_loop_t* loop;
|
||||
uv_async_t async;
|
||||
uv_work_t now_deleting;
|
||||
|
||||
/* FIFO command queue */
|
||||
uv_mutex_t cmd_mutex;
|
||||
uv_cond_t cmd_cond;
|
||||
volatile unsigned queue_size;
|
||||
struct rrdeng_cmdqueue cmd_queue;
|
||||
};
|
||||
|
||||
/*
|
||||
* Debug statistics not used by code logic.
|
||||
* They only describe operations since DB engine instance load time.
|
||||
*/
|
||||
struct rrdengine_statistics {
|
||||
rrdeng_stats_t metric_API_producers;
|
||||
rrdeng_stats_t metric_API_consumers;
|
||||
rrdeng_stats_t pg_cache_insertions;
|
||||
rrdeng_stats_t pg_cache_deletions;
|
||||
rrdeng_stats_t pg_cache_hits;
|
||||
rrdeng_stats_t pg_cache_misses;
|
||||
rrdeng_stats_t pg_cache_backfills;
|
||||
rrdeng_stats_t pg_cache_evictions;
|
||||
rrdeng_stats_t before_decompress_bytes;
|
||||
rrdeng_stats_t after_decompress_bytes;
|
||||
rrdeng_stats_t before_compress_bytes;
|
||||
rrdeng_stats_t after_compress_bytes;
|
||||
rrdeng_stats_t io_write_bytes;
|
||||
rrdeng_stats_t io_write_requests;
|
||||
rrdeng_stats_t io_read_bytes;
|
||||
rrdeng_stats_t io_read_requests;
|
||||
rrdeng_stats_t io_write_extent_bytes;
|
||||
rrdeng_stats_t io_write_extents;
|
||||
rrdeng_stats_t io_read_extent_bytes;
|
||||
rrdeng_stats_t io_read_extents;
|
||||
rrdeng_stats_t datafile_creations;
|
||||
rrdeng_stats_t datafile_deletions;
|
||||
rrdeng_stats_t journalfile_creations;
|
||||
rrdeng_stats_t journalfile_deletions;
|
||||
};
|
||||
|
||||
struct rrdengine_instance {
|
||||
rrdengine_state_t rrdengine_state;
|
||||
struct rrdengine_worker_config worker_config;
|
||||
struct completion rrdengine_completion;
|
||||
struct page_cache pg_cache;
|
||||
uint8_t global_compress_alg;
|
||||
struct transaction_commit_log commit_log;
|
||||
struct rrdengine_datafile_list datafiles;
|
||||
char dbfiles_path[FILENAME_MAX+1];
|
||||
uint64_t disk_space;
|
||||
uint64_t max_disk_space;
|
||||
unsigned long max_cache_pages;
|
||||
unsigned long cache_pages_low_watermark;
|
||||
|
||||
struct rrdengine_statistics stats;
|
||||
};
|
||||
|
||||
extern void sanity_check(void);
|
||||
extern int init_rrd_files(struct rrdengine_instance *ctx);
|
||||
extern void rrdeng_test_quota(struct rrdengine_worker_config* wc);
|
||||
extern void rrdeng_worker(void* arg);
|
||||
extern void rrdeng_enq_cmd(struct rrdengine_worker_config* wc, struct rrdeng_cmd *cmd);
|
||||
extern struct rrdeng_cmd rrdeng_deq_cmd(struct rrdengine_worker_config* wc);
|
||||
|
||||
#endif /* NETDATA_RRDENGINE_H */
|
484
database/engine/rrdengineapi.c
Normal file
484
database/engine/rrdengineapi.c
Normal file
|
@ -0,0 +1,484 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#include "rrdengine.h"
|
||||
|
||||
/* Default global database instance */
|
||||
static struct rrdengine_instance default_global_ctx;
|
||||
|
||||
int default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
|
||||
int default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB;
|
||||
|
||||
/*
|
||||
* Gets a handle for storing metrics to the database.
|
||||
* The handle must be released with rrdeng_store_metric_final().
|
||||
*/
|
||||
void rrdeng_store_metric_init(RRDDIM *rd)
|
||||
{
|
||||
struct rrdeng_collect_handle *handle;
|
||||
struct page_cache *pg_cache;
|
||||
struct rrdengine_instance *ctx;
|
||||
uuid_t temp_id;
|
||||
Pvoid_t *PValue;
|
||||
struct pg_cache_page_index *page_index;
|
||||
EVP_MD_CTX *evpctx;
|
||||
unsigned char hash_value[EVP_MAX_MD_SIZE];
|
||||
unsigned int hash_len;
|
||||
|
||||
//&default_global_ctx; TODO: test this use case or remove it?
|
||||
|
||||
ctx = rd->rrdset->rrdhost->rrdeng_ctx;
|
||||
pg_cache = &ctx->pg_cache;
|
||||
handle = &rd->state->handle.rrdeng;
|
||||
handle->ctx = ctx;
|
||||
|
||||
evpctx = EVP_MD_CTX_create();
|
||||
EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
|
||||
EVP_DigestUpdate(evpctx, rd->id, strlen(rd->id));
|
||||
EVP_DigestUpdate(evpctx, rd->rrdset->id, strlen(rd->rrdset->id));
|
||||
EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
|
||||
EVP_MD_CTX_destroy(evpctx);
|
||||
assert(hash_len > sizeof(temp_id));
|
||||
memcpy(&temp_id, hash_value, sizeof(temp_id));
|
||||
|
||||
handle->descr = NULL;
|
||||
handle->prev_descr = NULL;
|
||||
|
||||
uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
|
||||
PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &temp_id, sizeof(uuid_t));
|
||||
if (likely(NULL != PValue)) {
|
||||
page_index = *PValue;
|
||||
}
|
||||
uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
|
||||
if (NULL == PValue) {
|
||||
/* First time we see the UUID */
|
||||
uv_rwlock_wrlock(&pg_cache->metrics_index.lock);
|
||||
PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, &temp_id, sizeof(uuid_t), PJE0);
|
||||
assert(NULL == *PValue); /* TODO: figure out concurrency model */
|
||||
*PValue = page_index = create_page_index(&temp_id);
|
||||
uv_rwlock_wrunlock(&pg_cache->metrics_index.lock);
|
||||
}
|
||||
rd->state->rrdeng_uuid = &page_index->id;
|
||||
handle->page_index = page_index;
|
||||
}
|
||||
|
||||
void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number number)
|
||||
{
|
||||
struct rrdeng_collect_handle *handle;
|
||||
struct rrdengine_instance *ctx;
|
||||
struct page_cache *pg_cache;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
storage_number *page;
|
||||
|
||||
handle = &rd->state->handle.rrdeng;
|
||||
ctx = handle->ctx;
|
||||
pg_cache = &ctx->pg_cache;
|
||||
descr = handle->descr;
|
||||
if (unlikely(NULL == descr || descr->page_length + sizeof(number) > RRDENG_BLOCK_SIZE)) {
|
||||
if (descr) {
|
||||
descr->handle = NULL;
|
||||
if (descr->page_length) {
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);
|
||||
#endif
|
||||
/* added 1 extra reference to keep 2 dirty pages pinned per metric, expected refcnt = 2 */
|
||||
++descr->refcnt;
|
||||
rrdeng_commit_page(ctx, descr, handle->page_correlation_id);
|
||||
if (handle->prev_descr) {
|
||||
/* unpin old second page */
|
||||
pg_cache_put(handle->prev_descr);
|
||||
}
|
||||
handle->prev_descr = descr;
|
||||
} else {
|
||||
free(descr->page);
|
||||
free(descr);
|
||||
handle->descr = NULL;
|
||||
}
|
||||
}
|
||||
page = rrdeng_create_page(&handle->page_index->id, &descr);
|
||||
assert(page);
|
||||
handle->prev_descr = handle->descr;
|
||||
handle->descr = descr;
|
||||
descr->handle = handle;
|
||||
uv_rwlock_wrlock(&pg_cache->commited_page_index.lock);
|
||||
handle->page_correlation_id = pg_cache->commited_page_index.latest_corr_id++;
|
||||
uv_rwlock_wrunlock(&pg_cache->commited_page_index.lock);
|
||||
}
|
||||
page = descr->page;
|
||||
|
||||
page[descr->page_length / sizeof(number)] = number;
|
||||
descr->end_time = point_in_time;
|
||||
descr->page_length += sizeof(number);
|
||||
if (unlikely(INVALID_TIME == descr->start_time)) {
|
||||
descr->start_time = point_in_time;
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
rrd_stat_atomic_add(&ctx->stats.metric_API_producers, 1);
|
||||
#endif
|
||||
pg_cache_insert(ctx, handle->page_index, descr);
|
||||
} else {
|
||||
pg_cache_add_new_metric_time(handle->page_index, descr);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Releases the database reference from the handle for storing metrics.
|
||||
*/
|
||||
void rrdeng_store_metric_finalize(RRDDIM *rd)
|
||||
{
|
||||
struct rrdeng_collect_handle *handle;
|
||||
struct rrdengine_instance *ctx;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
|
||||
handle = &rd->state->handle.rrdeng;
|
||||
ctx = handle->ctx;
|
||||
descr = handle->descr;
|
||||
if (descr) {
|
||||
descr->handle = NULL;
|
||||
if (descr->page_length) {
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);
|
||||
#endif
|
||||
rrdeng_commit_page(ctx, descr, handle->page_correlation_id);
|
||||
if (handle->prev_descr) {
|
||||
/* unpin old second page */
|
||||
pg_cache_put(handle->prev_descr);
|
||||
}
|
||||
} else {
|
||||
free(descr->page);
|
||||
free(descr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Gets a handle for loading metrics from the database.
|
||||
* The handle must be released with rrdeng_load_metric_final().
|
||||
*/
|
||||
void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time)
|
||||
{
|
||||
struct rrdeng_query_handle *handle;
|
||||
struct rrdengine_instance *ctx;
|
||||
|
||||
ctx = rd->rrdset->rrdhost->rrdeng_ctx;
|
||||
rrdimm_handle->start_time = start_time;
|
||||
rrdimm_handle->end_time = end_time;
|
||||
handle = &rrdimm_handle->rrdeng;
|
||||
handle->now = start_time;
|
||||
handle->dt = rd->rrdset->update_every;
|
||||
handle->ctx = ctx;
|
||||
handle->descr = NULL;
|
||||
handle->page_index = pg_cache_preload(ctx, rd->state->rrdeng_uuid,
|
||||
start_time * USEC_PER_SEC, end_time * USEC_PER_SEC);
|
||||
}
|
||||
|
||||
storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle)
|
||||
{
|
||||
struct rrdeng_query_handle *handle;
|
||||
struct rrdengine_instance *ctx;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
storage_number *page, ret;
|
||||
unsigned position;
|
||||
usec_t point_in_time;
|
||||
|
||||
handle = &rrdimm_handle->rrdeng;
|
||||
if (unlikely(INVALID_TIME == handle->now)) {
|
||||
return SN_EMPTY_SLOT;
|
||||
}
|
||||
ctx = handle->ctx;
|
||||
point_in_time = handle->now * USEC_PER_SEC;
|
||||
descr = handle->descr;
|
||||
|
||||
if (unlikely(NULL == handle->page_index)) {
|
||||
ret = SN_EMPTY_SLOT;
|
||||
goto out;
|
||||
}
|
||||
if (unlikely(NULL == descr ||
|
||||
point_in_time < descr->start_time ||
|
||||
point_in_time > descr->end_time)) {
|
||||
if (descr) {
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
|
||||
#endif
|
||||
pg_cache_put(descr);
|
||||
handle->descr = NULL;
|
||||
}
|
||||
descr = pg_cache_lookup(ctx, handle->page_index, &handle->page_index->id, point_in_time);
|
||||
if (NULL == descr) {
|
||||
ret = SN_EMPTY_SLOT;
|
||||
goto out;
|
||||
}
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1);
|
||||
#endif
|
||||
handle->descr = descr;
|
||||
}
|
||||
if (unlikely(INVALID_TIME == descr->start_time ||
|
||||
INVALID_TIME == descr->end_time)) {
|
||||
ret = SN_EMPTY_SLOT;
|
||||
goto out;
|
||||
}
|
||||
page = descr->page;
|
||||
if (unlikely(descr->start_time == descr->end_time)) {
|
||||
ret = page[0];
|
||||
goto out;
|
||||
}
|
||||
position = ((uint64_t)(point_in_time - descr->start_time)) * (descr->page_length / sizeof(storage_number)) /
|
||||
(descr->end_time - descr->start_time + 1);
|
||||
ret = page[position];
|
||||
|
||||
out:
|
||||
handle->now += handle->dt;
|
||||
if (unlikely(handle->now > rrdimm_handle->end_time)) {
|
||||
handle->now = INVALID_TIME;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle)
|
||||
{
|
||||
struct rrdeng_query_handle *handle;
|
||||
|
||||
handle = &rrdimm_handle->rrdeng;
|
||||
return (INVALID_TIME == handle->now);
|
||||
}
|
||||
|
||||
/*
|
||||
* Releases the database reference from the handle for loading metrics.
|
||||
*/
|
||||
void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle)
|
||||
{
|
||||
struct rrdeng_query_handle *handle;
|
||||
struct rrdengine_instance *ctx;
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
|
||||
handle = &rrdimm_handle->rrdeng;
|
||||
ctx = handle->ctx;
|
||||
descr = handle->descr;
|
||||
if (descr) {
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
|
||||
#endif
|
||||
pg_cache_put(descr);
|
||||
}
|
||||
}
|
||||
|
||||
time_t rrdeng_metric_latest_time(RRDDIM *rd)
|
||||
{
|
||||
struct rrdeng_collect_handle *handle;
|
||||
struct pg_cache_page_index *page_index;
|
||||
|
||||
handle = &rd->state->handle.rrdeng;
|
||||
page_index = handle->page_index;
|
||||
|
||||
return page_index->latest_time / USEC_PER_SEC;
|
||||
}
|
||||
time_t rrdeng_metric_oldest_time(RRDDIM *rd)
|
||||
{
|
||||
struct rrdeng_collect_handle *handle;
|
||||
struct pg_cache_page_index *page_index;
|
||||
|
||||
handle = &rd->state->handle.rrdeng;
|
||||
page_index = handle->page_index;
|
||||
|
||||
return page_index->oldest_time / USEC_PER_SEC;
|
||||
}
|
||||
|
||||
/* Also gets a reference for the page */
|
||||
void *rrdeng_create_page(uuid_t *id, struct rrdeng_page_cache_descr **ret_descr)
|
||||
{
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
void *page;
|
||||
int ret;
|
||||
|
||||
/* TODO: check maximum number of pages in page cache limit */
|
||||
|
||||
page = mallocz(RRDENG_BLOCK_SIZE); /*TODO: add page size */
|
||||
descr = pg_cache_create_descr();
|
||||
descr->page = page;
|
||||
descr->id = id; /* TODO: add page type: metric, log, something? */
|
||||
descr->flags = RRD_PAGE_DIRTY /*| RRD_PAGE_LOCKED */ | RRD_PAGE_POPULATED /* | BEING_COLLECTED */;
|
||||
descr->refcnt = 1;
|
||||
|
||||
debug(D_RRDENGINE, "-----------------\nCreated new page:\n-----------------");
|
||||
if(unlikely(debug_flags & D_RRDENGINE))
|
||||
print_page_cache_descr(descr);
|
||||
*ret_descr = descr;
|
||||
return page;
|
||||
}
|
||||
|
||||
/* The page must not be empty */
|
||||
void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_cache_descr *descr,
|
||||
Word_t page_correlation_id)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
Pvoid_t *PValue;
|
||||
|
||||
if (unlikely(NULL == descr)) {
|
||||
debug(D_RRDENGINE, "%s: page descriptor is NULL, page has already been force-commited.", __func__);
|
||||
return;
|
||||
}
|
||||
assert(descr->page_length);
|
||||
|
||||
uv_rwlock_wrlock(&pg_cache->commited_page_index.lock);
|
||||
PValue = JudyLIns(&pg_cache->commited_page_index.JudyL_array, page_correlation_id, PJE0);
|
||||
*PValue = descr;
|
||||
++pg_cache->commited_page_index.nr_commited_pages;
|
||||
uv_rwlock_wrunlock(&pg_cache->commited_page_index.lock);
|
||||
|
||||
pg_cache_put(descr);
|
||||
}
|
||||
|
||||
/* Gets a reference for the page */
|
||||
void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle)
|
||||
{
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
|
||||
debug(D_RRDENGINE, "----------------------\nReading existing page:\n----------------------");
|
||||
descr = pg_cache_lookup(ctx, NULL, id, INVALID_TIME);
|
||||
if (NULL == descr) {
|
||||
*handle = NULL;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
*handle = descr;
|
||||
|
||||
return descr->page;
|
||||
}
|
||||
|
||||
/* Gets a reference for the page */
|
||||
void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time, void **handle)
|
||||
{
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
|
||||
debug(D_RRDENGINE, "----------------------\nReading existing page:\n----------------------");
|
||||
descr = pg_cache_lookup(ctx, NULL, id, point_in_time);
|
||||
if (NULL == descr) {
|
||||
*handle = NULL;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
*handle = descr;
|
||||
|
||||
return descr->page;
|
||||
}
|
||||
|
||||
void rrdeng_get_27_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
|
||||
{
|
||||
struct page_cache *pg_cache = &ctx->pg_cache;
|
||||
|
||||
array[0] = (uint64_t)ctx->stats.metric_API_producers;
|
||||
array[1] = (uint64_t)ctx->stats.metric_API_consumers;
|
||||
array[2] = (uint64_t)pg_cache->page_descriptors;
|
||||
array[3] = (uint64_t)pg_cache->populated_pages;
|
||||
array[4] = (uint64_t)pg_cache->commited_page_index.nr_commited_pages;
|
||||
array[5] = (uint64_t)ctx->stats.pg_cache_insertions;
|
||||
array[6] = (uint64_t)ctx->stats.pg_cache_deletions;
|
||||
array[7] = (uint64_t)ctx->stats.pg_cache_hits;
|
||||
array[8] = (uint64_t)ctx->stats.pg_cache_misses;
|
||||
array[9] = (uint64_t)ctx->stats.pg_cache_backfills;
|
||||
array[10] = (uint64_t)ctx->stats.pg_cache_evictions;
|
||||
array[11] = (uint64_t)ctx->stats.before_compress_bytes;
|
||||
array[12] = (uint64_t)ctx->stats.after_compress_bytes;
|
||||
array[13] = (uint64_t)ctx->stats.before_decompress_bytes;
|
||||
array[14] = (uint64_t)ctx->stats.after_decompress_bytes;
|
||||
array[15] = (uint64_t)ctx->stats.io_write_bytes;
|
||||
array[16] = (uint64_t)ctx->stats.io_write_requests;
|
||||
array[17] = (uint64_t)ctx->stats.io_read_bytes;
|
||||
array[18] = (uint64_t)ctx->stats.io_read_requests;
|
||||
array[19] = (uint64_t)ctx->stats.io_write_extent_bytes;
|
||||
array[20] = (uint64_t)ctx->stats.io_write_extents;
|
||||
array[21] = (uint64_t)ctx->stats.io_read_extent_bytes;
|
||||
array[22] = (uint64_t)ctx->stats.io_read_extents;
|
||||
array[23] = (uint64_t)ctx->stats.datafile_creations;
|
||||
array[24] = (uint64_t)ctx->stats.datafile_deletions;
|
||||
array[25] = (uint64_t)ctx->stats.journalfile_creations;
|
||||
array[26] = (uint64_t)ctx->stats.journalfile_deletions;
|
||||
}
|
||||
|
||||
/* Releases reference to page */
|
||||
void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle)
|
||||
{
|
||||
(void)ctx;
|
||||
pg_cache_put((struct rrdeng_page_cache_descr *)handle);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 0 on success, 1 on error
|
||||
*/
|
||||
int rrdeng_init(struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb, unsigned disk_space_mb)
|
||||
{
|
||||
struct rrdengine_instance *ctx;
|
||||
int error;
|
||||
|
||||
sanity_check();
|
||||
if (NULL == ctxp) {
|
||||
/* for testing */
|
||||
ctx = &default_global_ctx;
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
} else {
|
||||
*ctxp = ctx = callocz(1, sizeof(*ctx));
|
||||
}
|
||||
if (ctx->rrdengine_state != RRDENGINE_STATUS_UNINITIALIZED) {
|
||||
return 1;
|
||||
}
|
||||
ctx->rrdengine_state = RRDENGINE_STATUS_INITIALIZING;
|
||||
ctx->global_compress_alg = RRD_LZ4;
|
||||
if (page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB)
|
||||
page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
|
||||
ctx->max_cache_pages = page_cache_mb * (1048576LU / RRDENG_BLOCK_SIZE);
|
||||
/* try to keep 5% of the page cache free */
|
||||
ctx->cache_pages_low_watermark = (ctx->max_cache_pages * 95LLU) / 100;
|
||||
if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB)
|
||||
disk_space_mb = RRDENG_MIN_DISK_SPACE_MB;
|
||||
ctx->max_disk_space = disk_space_mb * 1048576LLU;
|
||||
strncpyz(ctx->dbfiles_path, dbfiles_path, sizeof(ctx->dbfiles_path) - 1);
|
||||
ctx->dbfiles_path[sizeof(ctx->dbfiles_path) - 1] = '\0';
|
||||
|
||||
memset(&ctx->worker_config, 0, sizeof(ctx->worker_config));
|
||||
ctx->worker_config.ctx = ctx;
|
||||
init_page_cache(ctx);
|
||||
init_commit_log(ctx);
|
||||
error = init_rrd_files(ctx);
|
||||
if (error) {
|
||||
ctx->rrdengine_state = RRDENGINE_STATUS_UNINITIALIZED;
|
||||
if (ctx != &default_global_ctx) {
|
||||
freez(ctx);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
init_completion(&ctx->rrdengine_completion);
|
||||
assert(0 == uv_thread_create(&ctx->worker_config.thread, rrdeng_worker, &ctx->worker_config));
|
||||
/* wait for worker thread to initialize */
|
||||
wait_for_completion(&ctx->rrdengine_completion);
|
||||
destroy_completion(&ctx->rrdengine_completion);
|
||||
|
||||
ctx->rrdengine_state = RRDENGINE_STATUS_INITIALIZED;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 0 on success, 1 on error
|
||||
*/
|
||||
int rrdeng_exit(struct rrdengine_instance *ctx)
|
||||
{
|
||||
struct rrdeng_cmd cmd;
|
||||
|
||||
if (NULL == ctx) {
|
||||
/* TODO: move to per host basis */
|
||||
ctx = &default_global_ctx;
|
||||
}
|
||||
if (ctx->rrdengine_state != RRDENGINE_STATUS_INITIALIZED) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* TODO: add page to page cache */
|
||||
cmd.opcode = RRDENG_SHUTDOWN;
|
||||
rrdeng_enq_cmd(&ctx->worker_config, &cmd);
|
||||
|
||||
assert(0 == uv_thread_join(&ctx->worker_config.thread));
|
||||
|
||||
if (ctx != &default_global_ctx) {
|
||||
freez(ctx);
|
||||
}
|
||||
return 0;
|
||||
}
|
37
database/engine/rrdengineapi.h
Normal file
37
database/engine/rrdengineapi.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDENGINEAPI_H
|
||||
#define NETDATA_RRDENGINEAPI_H
|
||||
|
||||
#include "rrdengine.h"
|
||||
|
||||
#define RRDENG_MIN_PAGE_CACHE_SIZE_MB (32)
|
||||
#define RRDENG_MIN_DISK_SPACE_MB (256)
|
||||
extern int default_rrdeng_page_cache_mb;
|
||||
extern int default_rrdeng_disk_quota_mb;
|
||||
|
||||
extern void *rrdeng_create_page(uuid_t *id, struct rrdeng_page_cache_descr **ret_descr);
|
||||
extern void rrdeng_commit_page(struct rrdengine_instance *ctx, struct rrdeng_page_cache_descr *descr,
|
||||
Word_t page_correlation_id);
|
||||
extern void *rrdeng_get_latest_page(struct rrdengine_instance *ctx, uuid_t *id, void **handle);
|
||||
extern void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_in_time, void **handle);
|
||||
extern void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle);
|
||||
extern void rrdeng_store_metric_init(RRDDIM *rd);
|
||||
extern void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number number);
|
||||
extern void rrdeng_store_metric_finalize(RRDDIM *rd);
|
||||
extern void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_handle,
|
||||
time_t start_time, time_t end_time);
|
||||
extern storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle);
|
||||
extern int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle);
|
||||
extern void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle);
|
||||
extern time_t rrdeng_metric_latest_time(RRDDIM *rd);
|
||||
extern time_t rrdeng_metric_oldest_time(RRDDIM *rd);
|
||||
extern void rrdeng_get_27_statistics(struct rrdengine_instance *ctx, unsigned long long *array);
|
||||
|
||||
/* must call once before using anything */
|
||||
extern int rrdeng_init(struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb,
|
||||
unsigned disk_space_mb);
|
||||
|
||||
extern int rrdeng_exit(struct rrdengine_instance *ctx);
|
||||
|
||||
#endif /* NETDATA_RRDENGINEAPI_H */
|
116
database/engine/rrdenginelib.c
Normal file
116
database/engine/rrdenginelib.c
Normal file
|
@ -0,0 +1,116 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#include "rrdengine.h"
|
||||
|
||||
void print_page_cache_descr(struct rrdeng_page_cache_descr *page_cache_descr)
|
||||
{
|
||||
char uuid_str[37];
|
||||
char str[512];
|
||||
int pos = 0;
|
||||
|
||||
uuid_unparse_lower(*page_cache_descr->id, uuid_str);
|
||||
pos += snprintfz(str, 512 - pos, "page(%p) id=%s\n"
|
||||
"--->len:%"PRIu32" time:%"PRIu64"->%"PRIu64" xt_offset:",
|
||||
page_cache_descr->page, uuid_str,
|
||||
page_cache_descr->page_length,
|
||||
(uint64_t)page_cache_descr->start_time,
|
||||
(uint64_t)page_cache_descr->end_time);
|
||||
if (!page_cache_descr->extent) {
|
||||
pos += snprintfz(str + pos, 512 - pos, "N/A");
|
||||
} else {
|
||||
pos += snprintfz(str + pos, 512 - pos, "%"PRIu64, page_cache_descr->extent->offset);
|
||||
}
|
||||
snprintfz(str + pos, 512 - pos, " flags:0x%2.2lX refcnt:%u\n\n", page_cache_descr->flags, page_cache_descr->refcnt);
|
||||
fputs(str, stderr);
|
||||
}
|
||||
|
||||
int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size)
|
||||
{
|
||||
int ret;
|
||||
uv_fs_t req;
|
||||
uv_stat_t* s;
|
||||
|
||||
ret = uv_fs_fstat(NULL, &req, file, NULL);
|
||||
if (ret < 0) {
|
||||
fatal("uv_fs_fstat: %s\n", uv_strerror(ret));
|
||||
}
|
||||
assert(req.result == 0);
|
||||
s = req.ptr;
|
||||
if (!(s->st_mode & S_IFREG)) {
|
||||
error("Not a regular file.\n");
|
||||
uv_fs_req_cleanup(&req);
|
||||
return UV_EINVAL;
|
||||
}
|
||||
if (s->st_size < min_size) {
|
||||
error("File length is too short.\n");
|
||||
uv_fs_req_cleanup(&req);
|
||||
return UV_EINVAL;
|
||||
}
|
||||
*file_size = s->st_size;
|
||||
uv_fs_req_cleanup(&req);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t size)
|
||||
{
|
||||
struct page_cache *pg_cache;
|
||||
|
||||
pg_cache = &ctx->pg_cache;
|
||||
snprintfz(str, size,
|
||||
"metric_API_producers: %ld\n"
|
||||
"metric_API_consumers: %ld\n"
|
||||
"page_cache_total_pages: %ld\n"
|
||||
"page_cache_populated_pages: %ld\n"
|
||||
"page_cache_commited_pages: %ld\n"
|
||||
"page_cache_insertions: %ld\n"
|
||||
"page_cache_deletions: %ld\n"
|
||||
"page_cache_hits: %ld\n"
|
||||
"page_cache_misses: %ld\n"
|
||||
"page_cache_backfills: %ld\n"
|
||||
"page_cache_evictions: %ld\n"
|
||||
"compress_before_bytes: %ld\n"
|
||||
"compress_after_bytes: %ld\n"
|
||||
"decompress_before_bytes: %ld\n"
|
||||
"decompress_after_bytes: %ld\n"
|
||||
"io_write_bytes: %ld\n"
|
||||
"io_write_requests: %ld\n"
|
||||
"io_read_bytes: %ld\n"
|
||||
"io_read_requests: %ld\n"
|
||||
"io_write_extent_bytes: %ld\n"
|
||||
"io_write_extents: %ld\n"
|
||||
"io_read_extent_bytes: %ld\n"
|
||||
"io_read_extents: %ld\n"
|
||||
"datafile_creations: %ld\n"
|
||||
"datafile_deletions: %ld\n"
|
||||
"journalfile_creations: %ld\n"
|
||||
"journalfile_deletions: %ld\n",
|
||||
(long)ctx->stats.metric_API_producers,
|
||||
(long)ctx->stats.metric_API_consumers,
|
||||
(long)pg_cache->page_descriptors,
|
||||
(long)pg_cache->populated_pages,
|
||||
(long)pg_cache->commited_page_index.nr_commited_pages,
|
||||
(long)ctx->stats.pg_cache_insertions,
|
||||
(long)ctx->stats.pg_cache_deletions,
|
||||
(long)ctx->stats.pg_cache_hits,
|
||||
(long)ctx->stats.pg_cache_misses,
|
||||
(long)ctx->stats.pg_cache_backfills,
|
||||
(long)ctx->stats.pg_cache_evictions,
|
||||
(long)ctx->stats.before_compress_bytes,
|
||||
(long)ctx->stats.after_compress_bytes,
|
||||
(long)ctx->stats.before_decompress_bytes,
|
||||
(long)ctx->stats.after_decompress_bytes,
|
||||
(long)ctx->stats.io_write_bytes,
|
||||
(long)ctx->stats.io_write_requests,
|
||||
(long)ctx->stats.io_read_bytes,
|
||||
(long)ctx->stats.io_read_requests,
|
||||
(long)ctx->stats.io_write_extent_bytes,
|
||||
(long)ctx->stats.io_write_extents,
|
||||
(long)ctx->stats.io_read_extent_bytes,
|
||||
(long)ctx->stats.io_read_extents,
|
||||
(long)ctx->stats.datafile_creations,
|
||||
(long)ctx->stats.datafile_deletions,
|
||||
(long)ctx->stats.journalfile_creations,
|
||||
(long)ctx->stats.journalfile_deletions
|
||||
);
|
||||
return str;
|
||||
}
|
84
database/engine/rrdenginelib.h
Normal file
84
database/engine/rrdenginelib.h
Normal file
|
@ -0,0 +1,84 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDENGINELIB_H
|
||||
#define NETDATA_RRDENGINELIB_H
|
||||
|
||||
#include "rrdengine.h"
|
||||
|
||||
/* Forward declarations */
|
||||
struct rrdeng_page_cache_descr;
|
||||
|
||||
#define STR_HELPER(x) #x
|
||||
#define STR(x) STR_HELPER(x)
|
||||
|
||||
/* Taken from linux kernel */
|
||||
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
|
||||
|
||||
#define ALIGN_BYTES_FLOOR(x) (((x) / RRDENG_BLOCK_SIZE) * RRDENG_BLOCK_SIZE)
|
||||
#define ALIGN_BYTES_CEILING(x) ((((x) + RRDENG_BLOCK_SIZE - 1) / RRDENG_BLOCK_SIZE) * RRDENG_BLOCK_SIZE)
|
||||
|
||||
typedef uintptr_t rrdeng_stats_t;
|
||||
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
#define rrd_stat_atomic_add(p, n) do {(void) __atomic_fetch_add(p, n, __ATOMIC_RELAXED);} while(0)
|
||||
#else
|
||||
#define rrd_stat_atomic_add(p, n) do {(void) __sync_fetch_and_add(p, n);} while(0)
|
||||
#endif
|
||||
|
||||
#ifndef O_DIRECT
|
||||
/* Workaround for OS X */
|
||||
#define O_DIRECT (0)
|
||||
#endif
|
||||
|
||||
struct completion {
|
||||
uv_mutex_t mutex;
|
||||
uv_cond_t cond;
|
||||
volatile unsigned completed;
|
||||
};
|
||||
|
||||
static inline void init_completion(struct completion *p)
|
||||
{
|
||||
p->completed = 0;
|
||||
assert(0 == uv_cond_init(&p->cond));
|
||||
assert(0 == uv_mutex_init(&p->mutex));
|
||||
}
|
||||
|
||||
static inline void destroy_completion(struct completion *p)
|
||||
{
|
||||
uv_cond_destroy(&p->cond);
|
||||
uv_mutex_destroy(&p->mutex);
|
||||
}
|
||||
|
||||
static inline void wait_for_completion(struct completion *p)
|
||||
{
|
||||
uv_mutex_lock(&p->mutex);
|
||||
while (0 == p->completed) {
|
||||
uv_cond_wait(&p->cond, &p->mutex);
|
||||
}
|
||||
assert(1 == p->completed);
|
||||
uv_mutex_unlock(&p->mutex);
|
||||
}
|
||||
|
||||
static inline void complete(struct completion *p)
|
||||
{
|
||||
uv_mutex_lock(&p->mutex);
|
||||
p->completed = 1;
|
||||
uv_mutex_unlock(&p->mutex);
|
||||
uv_cond_broadcast(&p->cond);
|
||||
}
|
||||
|
||||
static inline int crc32cmp(void *crcp, uLong crc)
|
||||
{
|
||||
return (*(uint32_t *)crcp != crc);
|
||||
}
|
||||
|
||||
static inline void crc32set(void *crcp, uLong crc)
|
||||
{
|
||||
*(uint32_t *)crcp = crc;
|
||||
}
|
||||
|
||||
extern void print_page_cache_descr(struct rrdeng_page_cache_descr *page_cache_descr);
|
||||
extern int check_file_properties(uv_file file, uint64_t *file_size, size_t min_size);
|
||||
extern char *get_rrdeng_statistics(struct rrdengine_instance *ctx, char *str, size_t size);
|
||||
|
||||
#endif /* NETDATA_RRDENGINELIB_H */
|
|
@ -38,6 +38,9 @@ inline const char *rrd_memory_mode_name(RRD_MEMORY_MODE id) {
|
|||
|
||||
case RRD_MEMORY_MODE_ALLOC:
|
||||
return RRD_MEMORY_MODE_ALLOC_NAME;
|
||||
|
||||
case RRD_MEMORY_MODE_DBENGINE:
|
||||
return RRD_MEMORY_MODE_DBENGINE_NAME;
|
||||
}
|
||||
|
||||
return RRD_MEMORY_MODE_SAVE_NAME;
|
||||
|
@ -56,6 +59,9 @@ RRD_MEMORY_MODE rrd_memory_mode_id(const char *name) {
|
|||
else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_ALLOC_NAME)))
|
||||
return RRD_MEMORY_MODE_ALLOC;
|
||||
|
||||
else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_DBENGINE_NAME)))
|
||||
return RRD_MEMORY_MODE_DBENGINE;
|
||||
|
||||
return RRD_MEMORY_MODE_SAVE;
|
||||
}
|
||||
|
||||
|
@ -140,7 +146,8 @@ char *rrdset_cache_dir(RRDHOST *host, const char *id, const char *config_section
|
|||
snprintfz(n, FILENAME_MAX, "%s/%s", host->cache_dir, b);
|
||||
ret = config_get(config_section, "cache directory", n);
|
||||
|
||||
if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
|
||||
if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE ||
|
||||
host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
int r = mkdir(ret, 0775);
|
||||
if(r != 0 && errno != EEXIST)
|
||||
error("Cannot create directory '%s'", ret);
|
||||
|
|
148
database/rrd.h
148
database/rrd.h
|
@ -14,6 +14,14 @@ typedef struct rrdcalc RRDCALC;
|
|||
typedef struct rrdcalctemplate RRDCALCTEMPLATE;
|
||||
typedef struct alarm_entry ALARM_ENTRY;
|
||||
|
||||
// forward declarations
|
||||
struct rrddim_volatile;
|
||||
#ifdef ENABLE_DBENGINE
|
||||
struct rrdeng_page_cache_descr;
|
||||
struct rrdengine_instance;
|
||||
struct pg_cache_page_index;
|
||||
#endif
|
||||
|
||||
#include "../daemon/common.h"
|
||||
#include "web/api/queries/query.h"
|
||||
#include "rrdvar.h"
|
||||
|
@ -66,7 +74,8 @@ typedef enum rrd_memory_mode {
|
|||
RRD_MEMORY_MODE_RAM = 1,
|
||||
RRD_MEMORY_MODE_MAP = 2,
|
||||
RRD_MEMORY_MODE_SAVE = 3,
|
||||
RRD_MEMORY_MODE_ALLOC = 4
|
||||
RRD_MEMORY_MODE_ALLOC = 4,
|
||||
RRD_MEMORY_MODE_DBENGINE = 5
|
||||
} RRD_MEMORY_MODE;
|
||||
|
||||
#define RRD_MEMORY_MODE_NONE_NAME "none"
|
||||
|
@ -74,6 +83,7 @@ typedef enum rrd_memory_mode {
|
|||
#define RRD_MEMORY_MODE_MAP_NAME "map"
|
||||
#define RRD_MEMORY_MODE_SAVE_NAME "save"
|
||||
#define RRD_MEMORY_MODE_ALLOC_NAME "alloc"
|
||||
#define RRD_MEMORY_MODE_DBENGINE_NAME "dbengine"
|
||||
|
||||
extern RRD_MEMORY_MODE default_rrd_memory_mode;
|
||||
|
||||
|
@ -178,7 +188,8 @@ struct rrddim {
|
|||
char *cache_filename; // the filename we load/save from/to this set
|
||||
|
||||
size_t collections_counter; // the number of times we added values to this rrdim
|
||||
size_t unused[9];
|
||||
struct rrddim_volatile *state; // volatile state that is not persistently stored
|
||||
size_t unused[8];
|
||||
|
||||
collected_number collected_value_max; // the absolute maximum of the collected value
|
||||
|
||||
|
@ -226,6 +237,90 @@ struct rrddim {
|
|||
storage_number values[]; // the array of values - THIS HAS TO BE THE LAST MEMBER
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// iterator state for RRD dimension data collection
|
||||
union rrddim_collect_handle {
|
||||
struct {
|
||||
long slot;
|
||||
long entries;
|
||||
} slotted; // state the legacy code uses
|
||||
#ifdef ENABLE_DBENGINE
|
||||
struct rrdeng_collect_handle {
|
||||
struct rrdeng_page_cache_descr *descr, *prev_descr;
|
||||
unsigned long page_correlation_id;
|
||||
struct rrdengine_instance *ctx;
|
||||
struct pg_cache_page_index *page_index;
|
||||
} rrdeng; // state the database engine uses
|
||||
#endif
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// iterator state for RRD dimension data queries
|
||||
struct rrddim_query_handle {
|
||||
RRDDIM *rd;
|
||||
time_t start_time;
|
||||
time_t end_time;
|
||||
union {
|
||||
struct {
|
||||
long slot;
|
||||
long last_slot;
|
||||
uint8_t finished;
|
||||
} slotted; // state the legacy code uses
|
||||
#ifdef ENABLE_DBENGINE
|
||||
struct rrdeng_query_handle {
|
||||
struct rrdeng_page_cache_descr *descr;
|
||||
struct rrdengine_instance *ctx;
|
||||
struct pg_cache_page_index *page_index;
|
||||
time_t now; //TODO: remove now to implement next point iteration
|
||||
time_t dt; //TODO: remove dt to implement next point iteration
|
||||
} rrdeng; // state the database engine uses
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// volatile state per RRD dimension
|
||||
struct rrddim_volatile {
|
||||
#ifdef ENABLE_DBENGINE
|
||||
uuid_t *rrdeng_uuid; // database engine metric UUID
|
||||
#endif
|
||||
union rrddim_collect_handle handle;
|
||||
// ------------------------------------------------------------------------
|
||||
// function pointers that handle data collection
|
||||
struct rrddim_collect_ops {
|
||||
// an initialization function to run before starting collection
|
||||
void (*init)(RRDDIM *rd);
|
||||
|
||||
// run this to store each metric into the database
|
||||
void (*store_metric)(RRDDIM *rd, usec_t point_in_time, storage_number number);
|
||||
|
||||
// an finalization function to run after collection is over
|
||||
void (*finalize)(RRDDIM *rd);
|
||||
} collect_ops;
|
||||
|
||||
// function pointers that handle database queries
|
||||
struct rrddim_query_ops {
|
||||
// run this before starting a series of next_metric() database queries
|
||||
void (*init)(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time);
|
||||
|
||||
// run this to load each metric number from the database
|
||||
storage_number (*next_metric)(struct rrddim_query_handle *handle);
|
||||
|
||||
// run this to test if the series of next_metric() database queries is finished
|
||||
int (*is_finished)(struct rrddim_query_handle *handle);
|
||||
|
||||
// run this after finishing a series of load_metric() database queries
|
||||
void (*finalize)(struct rrddim_query_handle *handle);
|
||||
|
||||
// get the timestamp of the last entry of this metric
|
||||
time_t (*latest_time)(RRDDIM *rd);
|
||||
|
||||
// get the timestamp of the first entry of this metric
|
||||
time_t (*oldest_time)(RRDDIM *rd);
|
||||
} query_ops;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// these loop macros make sure the linked list is accessed with the right lock
|
||||
|
||||
|
@ -528,6 +623,10 @@ struct rrdhost {
|
|||
|
||||
int rrd_update_every; // the update frequency of the host
|
||||
long rrd_history_entries; // the number of history entries for the host's charts
|
||||
#ifdef ENABLE_DBENGINE
|
||||
unsigned page_cache_mb; // Database Engine page cache size in MiB
|
||||
unsigned disk_space_mb; // Database Engine disk space quota in MiB
|
||||
#endif
|
||||
RRD_MEMORY_MODE rrd_memory_mode; // the memory more for the charts of this host
|
||||
|
||||
char *cache_dir; // the directory to save RRD cache files
|
||||
|
@ -620,6 +719,10 @@ struct rrdhost {
|
|||
avl_tree_lock rrdfamily_root_index; // the host's chart families index
|
||||
avl_tree_lock rrdvar_root_index; // the host's chart variables index
|
||||
|
||||
#ifdef ENABLE_DBENGINE
|
||||
struct rrdengine_instance *rrdeng_ctx; // DB engine instance for this host
|
||||
#endif
|
||||
|
||||
struct rrdhost *next;
|
||||
};
|
||||
extern RRDHOST *localhost;
|
||||
|
@ -771,10 +874,41 @@ extern void rrdset_isnot_obsolete(RRDSET *st);
|
|||
#define rrdset_duration(st) ((time_t)( (((st)->counter >= ((unsigned long)(st)->entries))?(unsigned long)(st)->entries:(st)->counter) * (st)->update_every ))
|
||||
|
||||
// get the timestamp of the last entry in the round robin database
|
||||
#define rrdset_last_entry_t(st) ((time_t)(((st)->last_updated.tv_sec)))
|
||||
static inline time_t rrdset_last_entry_t(RRDSET *st) {
|
||||
if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
RRDDIM *rd;
|
||||
time_t last_entry_t = 0;
|
||||
|
||||
int ret = netdata_rwlock_tryrdlock(&st->rrdset_rwlock);
|
||||
rrddim_foreach_read(rd, st) {
|
||||
last_entry_t = MAX(last_entry_t, rd->state->query_ops.latest_time(rd));
|
||||
}
|
||||
if(0 == ret) netdata_rwlock_unlock(&st->rrdset_rwlock);
|
||||
|
||||
return last_entry_t;
|
||||
} else {
|
||||
return (time_t)st->last_updated.tv_sec;
|
||||
}
|
||||
}
|
||||
|
||||
// get the timestamp of first entry in the round robin database
|
||||
#define rrdset_first_entry_t(st) ((time_t)(rrdset_last_entry_t(st) - rrdset_duration(st)))
|
||||
static inline time_t rrdset_first_entry_t(RRDSET *st) {
|
||||
if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
RRDDIM *rd;
|
||||
time_t first_entry_t = LONG_MAX;
|
||||
|
||||
int ret = netdata_rwlock_tryrdlock(&st->rrdset_rwlock);
|
||||
rrddim_foreach_read(rd, st) {
|
||||
first_entry_t = MIN(first_entry_t, rd->state->query_ops.oldest_time(rd));
|
||||
}
|
||||
if(0 == ret) netdata_rwlock_unlock(&st->rrdset_rwlock);
|
||||
|
||||
if (unlikely(LONG_MAX == first_entry_t)) return 0;
|
||||
return first_entry_t;
|
||||
} else {
|
||||
return (time_t)(rrdset_last_entry_t(st) - rrdset_duration(st));
|
||||
}
|
||||
}
|
||||
|
||||
// get the last slot updated in the round robin database
|
||||
#define rrdset_last_slot(st) ((size_t)(((st)->current_entry == 0) ? (st)->entries - 1 : (st)->current_entry - 1))
|
||||
|
@ -914,5 +1048,11 @@ extern void rrdhost_cleanup_obsolete_charts(RRDHOST *host);
|
|||
|
||||
#endif /* NETDATA_RRD_INTERNALS */
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRD DB engine declarations
|
||||
|
||||
#ifdef ENABLE_DBENGINE
|
||||
#include "database/engine/rrdengineapi.h"
|
||||
#endif
|
||||
|
||||
#endif /* NETDATA_RRD_H */
|
||||
|
|
|
@ -89,6 +89,69 @@ inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor)
|
|||
return 1;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDDIM legacy data collection functions
|
||||
|
||||
static void rrddim_collect_init(RRDDIM *rd) {
|
||||
rd->values[rd->rrdset->current_entry] = SN_EMPTY_SLOT; // pack_storage_number(0, SN_NOT_EXISTS);
|
||||
}
|
||||
static void rrddim_collect_store_metric(RRDDIM *rd, usec_t point_in_time, storage_number number) {
|
||||
(void)point_in_time;
|
||||
|
||||
rd->values[rd->rrdset->current_entry] = number;
|
||||
}
|
||||
static void rrddim_collect_finalize(RRDDIM *rd) {
|
||||
(void)rd;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDDIM legacy database query functions
|
||||
|
||||
static void rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time) {
|
||||
handle->rd = rd;
|
||||
handle->start_time = start_time;
|
||||
handle->end_time = end_time;
|
||||
handle->slotted.slot = rrdset_time2slot(rd->rrdset, start_time);
|
||||
handle->slotted.last_slot = rrdset_time2slot(rd->rrdset, end_time);
|
||||
handle->slotted.finished = 0;
|
||||
}
|
||||
|
||||
static storage_number rrddim_query_next_metric(struct rrddim_query_handle *handle) {
|
||||
RRDDIM *rd = handle->rd;
|
||||
long entries = rd->rrdset->entries;
|
||||
long slot = handle->slotted.slot;
|
||||
|
||||
if (unlikely(handle->slotted.slot == handle->slotted.last_slot))
|
||||
handle->slotted.finished = 1;
|
||||
storage_number n = rd->values[slot++];
|
||||
|
||||
if(unlikely(slot >= entries)) slot = 0;
|
||||
handle->slotted.slot = slot;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static int rrddim_query_is_finished(struct rrddim_query_handle *handle) {
|
||||
return handle->slotted.finished;
|
||||
}
|
||||
|
||||
static void rrddim_query_finalize(struct rrddim_query_handle *handle) {
|
||||
(void)handle;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static time_t rrddim_query_latest_time(RRDDIM *rd) {
|
||||
return rrdset_last_entry_t(rd->rrdset);
|
||||
}
|
||||
|
||||
static time_t rrddim_query_oldest_time(RRDDIM *rd) {
|
||||
return rrdset_first_entry_t(rd->rrdset);
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDDIM create a dimension
|
||||
|
||||
|
@ -123,9 +186,10 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
|
|||
rrdset_strncpyz_name(filename, id, FILENAME_MAX);
|
||||
snprintfz(fullfilename, FILENAME_MAX, "%s/%s.db", st->cache_dir, filename);
|
||||
|
||||
if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP || memory_mode == RRD_MEMORY_MODE_RAM) {
|
||||
if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP ||
|
||||
memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
rd = (RRDDIM *)mymmap(
|
||||
(memory_mode == RRD_MEMORY_MODE_RAM)?NULL:fullfilename
|
||||
(memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE)?NULL:fullfilename
|
||||
, size
|
||||
, ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE)
|
||||
, 1
|
||||
|
@ -146,7 +210,7 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
|
|||
struct timeval now;
|
||||
now_realtime_timeval(&now);
|
||||
|
||||
if(memory_mode == RRD_MEMORY_MODE_RAM) {
|
||||
if(memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
memset(rd, 0, size);
|
||||
}
|
||||
else {
|
||||
|
@ -243,11 +307,34 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
|
|||
rd->collected_volume = 0;
|
||||
rd->stored_volume = 0;
|
||||
rd->last_stored_value = 0;
|
||||
rd->values[st->current_entry] = SN_EMPTY_SLOT; // pack_storage_number(0, SN_NOT_EXISTS);
|
||||
rd->last_collected_time.tv_sec = 0;
|
||||
rd->last_collected_time.tv_usec = 0;
|
||||
rd->rrdset = st;
|
||||
|
||||
rd->state = mallocz(sizeof(*rd->state));
|
||||
if(memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
#ifdef ENABLE_DBENGINE
|
||||
rd->state->collect_ops.init = rrdeng_store_metric_init;
|
||||
rd->state->collect_ops.store_metric = rrdeng_store_metric_next;
|
||||
rd->state->collect_ops.finalize = rrdeng_store_metric_finalize;
|
||||
rd->state->query_ops.init = rrdeng_load_metric_init;
|
||||
rd->state->query_ops.next_metric = rrdeng_load_metric_next;
|
||||
rd->state->query_ops.is_finished = rrdeng_load_metric_is_finished;
|
||||
rd->state->query_ops.finalize = rrdeng_load_metric_finalize;
|
||||
rd->state->query_ops.latest_time = rrdeng_metric_latest_time;
|
||||
rd->state->query_ops.oldest_time = rrdeng_metric_oldest_time;
|
||||
#endif
|
||||
} else {
|
||||
rd->state->collect_ops.init = rrddim_collect_init;
|
||||
rd->state->collect_ops.store_metric = rrddim_collect_store_metric;
|
||||
rd->state->collect_ops.finalize = rrddim_collect_finalize;
|
||||
rd->state->query_ops.init = rrddim_query_init;
|
||||
rd->state->query_ops.next_metric = rrddim_query_next_metric;
|
||||
rd->state->query_ops.is_finished = rrddim_query_is_finished;
|
||||
rd->state->query_ops.finalize = rrddim_query_finalize;
|
||||
rd->state->query_ops.latest_time = rrddim_query_latest_time;
|
||||
rd->state->query_ops.oldest_time = rrddim_query_oldest_time;
|
||||
}
|
||||
rd->state->collect_ops.init(rd);
|
||||
// append this dimension
|
||||
if(!st->dimensions)
|
||||
st->dimensions = rd;
|
||||
|
@ -294,6 +381,9 @@ void rrddim_free(RRDSET *st, RRDDIM *rd)
|
|||
{
|
||||
debug(D_RRD_CALLS, "rrddim_free() %s.%s", st->name, rd->name);
|
||||
|
||||
rd->state->collect_ops.finalize(rd);
|
||||
freez(rd->state);
|
||||
|
||||
if(rd == st->dimensions)
|
||||
st->dimensions = rd->next;
|
||||
else {
|
||||
|
@ -319,6 +409,7 @@ void rrddim_free(RRDSET *st, RRDDIM *rd)
|
|||
case RRD_MEMORY_MODE_SAVE:
|
||||
case RRD_MEMORY_MODE_MAP:
|
||||
case RRD_MEMORY_MODE_RAM:
|
||||
case RRD_MEMORY_MODE_DBENGINE:
|
||||
debug(D_RRD_CALLS, "Unmapping dimension '%s'.", rd->name);
|
||||
freez((void *)rd->id);
|
||||
freez(rd->cache_filename);
|
||||
|
|
|
@ -134,6 +134,10 @@ RRDHOST *rrdhost_create(const char *hostname,
|
|||
host->rrd_update_every = (update_every > 0)?update_every:1;
|
||||
host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries);
|
||||
host->rrd_memory_mode = memory_mode;
|
||||
#ifdef ENABLE_DBENGINE
|
||||
host->page_cache_mb = default_rrdeng_page_cache_mb;
|
||||
host->disk_space_mb = default_rrdeng_disk_quota_mb;
|
||||
#endif
|
||||
host->health_enabled = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled;
|
||||
host->rrdpush_send_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) ? 1 : 0;
|
||||
host->rrdpush_send_destination = (host->rrdpush_send_enabled)?strdupz(rrdpush_destination):NULL;
|
||||
|
@ -205,7 +209,8 @@ RRDHOST *rrdhost_create(const char *hostname,
|
|||
snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid);
|
||||
host->cache_dir = strdupz(filename);
|
||||
|
||||
if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) {
|
||||
if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE ||
|
||||
host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
int r = mkdir(host->cache_dir, 0775);
|
||||
if(r != 0 && errno != EEXIST)
|
||||
error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir);
|
||||
|
@ -221,6 +226,30 @@ RRDHOST *rrdhost_create(const char *hostname,
|
|||
}
|
||||
|
||||
}
|
||||
if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
#ifdef ENABLE_DBENGINE
|
||||
char dbenginepath[FILENAME_MAX + 1];
|
||||
int ret;
|
||||
|
||||
snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", host->cache_dir);
|
||||
ret = mkdir(dbenginepath, 0775);
|
||||
if(ret != 0 && errno != EEXIST)
|
||||
error("Host '%s': cannot create directory '%s'", host->hostname, dbenginepath);
|
||||
else
|
||||
ret = rrdeng_init(&host->rrdeng_ctx, dbenginepath, host->page_cache_mb, host->disk_space_mb);
|
||||
if(ret) {
|
||||
error("Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.",
|
||||
host->hostname, host->machine_guid, host->cache_dir);
|
||||
rrdhost_free(host);
|
||||
host = NULL;
|
||||
//rrd_hosts_available++; //TODO: maybe we want this?
|
||||
|
||||
return host;
|
||||
}
|
||||
#else
|
||||
fatal("RRD_MEMORY_MODE_DBENGINE is not supported in this platform.");
|
||||
#endif
|
||||
}
|
||||
|
||||
if(host->health_enabled) {
|
||||
snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir);
|
||||
|
@ -569,6 +598,12 @@ void rrdhost_free(RRDHOST *host) {
|
|||
|
||||
health_alarm_log_free(host);
|
||||
|
||||
if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
#ifdef ENABLE_DBENGINE
|
||||
rrdeng_exit(host->rrdeng_ctx);
|
||||
#endif
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// remove it from the indexes
|
||||
|
||||
|
|
|
@ -363,6 +363,7 @@ void rrdset_free(RRDSET *st) {
|
|||
case RRD_MEMORY_MODE_SAVE:
|
||||
case RRD_MEMORY_MODE_MAP:
|
||||
case RRD_MEMORY_MODE_RAM:
|
||||
case RRD_MEMORY_MODE_DBENGINE:
|
||||
debug(D_RRD_CALLS, "Unmapping stats '%s'.", st->name);
|
||||
munmap(st, st->memsize);
|
||||
break;
|
||||
|
@ -541,6 +542,9 @@ RRDSET *rrdset_create_custom(
|
|||
int enabled = config_get_boolean(config_section, "enabled", 1);
|
||||
if(!enabled) entries = 5;
|
||||
|
||||
if(memory_mode == RRD_MEMORY_MODE_DBENGINE)
|
||||
entries = config_set_number(config_section, "history", 5);
|
||||
|
||||
unsigned long size = sizeof(RRDSET);
|
||||
char *cache_dir = rrdset_cache_dir(host, fullid, config_section);
|
||||
|
||||
|
@ -552,9 +556,10 @@ RRDSET *rrdset_create_custom(
|
|||
debug(D_RRD_CALLS, "Creating RRD_STATS for '%s.%s'.", type, id);
|
||||
|
||||
snprintfz(fullfilename, FILENAME_MAX, "%s/main.db", cache_dir);
|
||||
if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP || memory_mode == RRD_MEMORY_MODE_RAM) {
|
||||
if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP ||
|
||||
memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
st = (RRDSET *) mymmap(
|
||||
(memory_mode == RRD_MEMORY_MODE_RAM)?NULL:fullfilename
|
||||
(memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE)?NULL:fullfilename
|
||||
, size
|
||||
, ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE)
|
||||
, 0
|
||||
|
@ -585,7 +590,7 @@ RRDSET *rrdset_create_custom(
|
|||
st->alarms = NULL;
|
||||
st->flags = 0x00000000;
|
||||
|
||||
if(memory_mode == RRD_MEMORY_MODE_RAM) {
|
||||
if(memory_mode == RRD_MEMORY_MODE_RAM || memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
memset(st, 0, size);
|
||||
}
|
||||
else {
|
||||
|
@ -631,7 +636,10 @@ RRDSET *rrdset_create_custom(
|
|||
|
||||
if(unlikely(!st)) {
|
||||
st = callocz(1, size);
|
||||
st->rrd_memory_mode = (memory_mode == RRD_MEMORY_MODE_NONE) ? RRD_MEMORY_MODE_NONE : RRD_MEMORY_MODE_ALLOC;
|
||||
if (memory_mode == RRD_MEMORY_MODE_DBENGINE)
|
||||
st->rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
|
||||
else
|
||||
st->rrd_memory_mode = (memory_mode == RRD_MEMORY_MODE_NONE) ? RRD_MEMORY_MODE_NONE : RRD_MEMORY_MODE_ALLOC;
|
||||
}
|
||||
|
||||
st->plugin_name = plugin?strdupz(plugin):NULL;
|
||||
|
@ -1052,12 +1060,14 @@ static inline size_t rrdset_done_interpolate(
|
|||
}
|
||||
|
||||
if(unlikely(!store_this_entry)) {
|
||||
rd->values[current_entry] = SN_EMPTY_SLOT; //pack_storage_number(0, SN_NOT_EXISTS);
|
||||
rd->state->collect_ops.store_metric(rd, next_store_ut, SN_EMPTY_SLOT); //pack_storage_number(0, SN_NOT_EXISTS)
|
||||
// rd->values[current_entry] = SN_EMPTY_SLOT; //pack_storage_number(0, SN_NOT_EXISTS);
|
||||
continue;
|
||||
}
|
||||
|
||||
if(likely(rd->updated && rd->collections_counter > 1 && iterations < st->gap_when_lost_iterations_above)) {
|
||||
rd->values[current_entry] = pack_storage_number(new_value, storage_flags );
|
||||
rd->state->collect_ops.store_metric(rd, next_store_ut, pack_storage_number(new_value, storage_flags));
|
||||
// rd->values[current_entry] = pack_storage_number(new_value, storage_flags );
|
||||
rd->last_stored_value = new_value;
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
|
@ -1079,7 +1089,8 @@ static inline size_t rrdset_done_interpolate(
|
|||
);
|
||||
#endif
|
||||
|
||||
rd->values[current_entry] = SN_EMPTY_SLOT; // pack_storage_number(0, SN_NOT_EXISTS);
|
||||
// rd->values[current_entry] = SN_EMPTY_SLOT; // pack_storage_number(0, SN_NOT_EXISTS);
|
||||
rd->state->collect_ops.store_metric(rd, next_store_ut, SN_EMPTY_SLOT); //pack_storage_number(0, SN_NOT_EXISTS)
|
||||
rd->last_stored_value = NAN;
|
||||
}
|
||||
|
||||
|
@ -1119,11 +1130,16 @@ static inline size_t rrdset_done_interpolate(
|
|||
// reset the storage flags for the next point, if any;
|
||||
storage_flags = SN_EXISTS;
|
||||
|
||||
counter++;
|
||||
current_entry = ((current_entry + 1) >= st->entries) ? 0 : current_entry + 1;
|
||||
st->counter = ++counter;
|
||||
st->current_entry = current_entry = ((current_entry + 1) >= st->entries) ? 0 : current_entry + 1;
|
||||
|
||||
st->last_updated.tv_sec = (time_t) (last_ut / USEC_PER_SEC);
|
||||
st->last_updated.tv_usec = 0;
|
||||
|
||||
last_stored_ut = next_store_ut;
|
||||
}
|
||||
|
||||
/*
|
||||
st->counter = counter;
|
||||
st->current_entry = current_entry;
|
||||
|
||||
|
@ -1131,6 +1147,7 @@ static inline size_t rrdset_done_interpolate(
|
|||
st->last_updated.tv_sec = (time_t) (last_ut / USEC_PER_SEC);
|
||||
st->last_updated.tv_usec = 0;
|
||||
}
|
||||
*/
|
||||
|
||||
return stored_entries;
|
||||
}
|
||||
|
@ -1201,7 +1218,8 @@ void rrdset_done(RRDSET *st) {
|
|||
}
|
||||
|
||||
// check if the chart has a long time to be updated
|
||||
if(unlikely(st->usec_since_last_update > st->entries * update_every_ut)) {
|
||||
if(unlikely(st->usec_since_last_update > st->entries * update_every_ut &&
|
||||
st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE)) {
|
||||
info("host '%s', chart %s: took too long to be updated (counter #%zu, update #%zu, %0.3" LONG_DOUBLE_MODIFIER " secs). Resetting it.", st->rrdhost->hostname, st->name, st->counter, st->counter_done, (LONG_DOUBLE)st->usec_since_last_update / USEC_PER_SEC);
|
||||
rrdset_reset(st);
|
||||
st->usec_since_last_update = update_every_ut;
|
||||
|
@ -1242,7 +1260,8 @@ void rrdset_done(RRDSET *st) {
|
|||
}
|
||||
|
||||
// check if we will re-write the entire data set
|
||||
if(unlikely(dt_usec(&st->last_collected_time, &st->last_updated) > st->entries * update_every_ut)) {
|
||||
if(unlikely(dt_usec(&st->last_collected_time, &st->last_updated) > st->entries * update_every_ut &&
|
||||
st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE)) {
|
||||
info("%s: too old data (last updated at %ld.%ld, last collected at %ld.%ld). Resetting it. Will not store the next entry.", st->name, st->last_updated.tv_sec, st->last_updated.tv_usec, st->last_collected_time.tv_sec, st->last_collected_time.tv_usec);
|
||||
rrdset_reset(st);
|
||||
rrdset_init_last_updated_time(st);
|
||||
|
@ -1266,11 +1285,17 @@ void rrdset_done(RRDSET *st) {
|
|||
// if we have not collected metrics this session (st->counter_done == 0)
|
||||
// and we have collected metrics for this chart in the past (st->counter != 0)
|
||||
// fill the gap (the chart has been just loaded from disk)
|
||||
if(unlikely(st->counter)) {
|
||||
if(unlikely(st->counter) && st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) {
|
||||
rrdset_done_fill_the_gap(st);
|
||||
last_stored_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec;
|
||||
next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC;
|
||||
}
|
||||
if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
// set a fake last_updated to jump to current time
|
||||
rrdset_init_last_updated_time(st);
|
||||
last_stored_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec;
|
||||
next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC;
|
||||
}
|
||||
|
||||
if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST))) {
|
||||
store_this_entry = 1;
|
||||
|
|
|
@ -202,6 +202,9 @@
|
|||
#endif
|
||||
#define abs(x) (((x) < 0)? (-(x)) : (x))
|
||||
|
||||
#define MIN(a,b) (((a)<(b))?(a):(b))
|
||||
#define MAX(a,b) (((a)>(b))?(a):(b))
|
||||
|
||||
#define GUID_LEN 36
|
||||
|
||||
extern void netdata_fix_chart_id(char *s);
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#define D_STATSD 0x0000000010000000
|
||||
#define D_POLLFD 0x0000000020000000
|
||||
#define D_STREAM 0x0000000040000000
|
||||
#define D_RRDENGINE 0x0000000100000000
|
||||
#define D_SYSTEM 0x8000000000000000
|
||||
|
||||
//#define DEBUG (D_WEB_CLIENT_ACCESS|D_LISTENER|D_RRD_STATS)
|
||||
|
|
|
@ -191,13 +191,13 @@ This is how to do it by hand:
|
|||
|
||||
```sh
|
||||
# Debian / Ubuntu
|
||||
apt-get install zlib1g-dev uuid-dev libmnl-dev gcc make git autoconf autoconf-archive autogen automake pkg-config curl
|
||||
apt-get install zlib1g-dev uuid-dev libuv1-dev liblz4-dev libjudy-dev libssl-dev libmnl-dev gcc make git autoconf autoconf-archive autogen automake pkg-config curl
|
||||
|
||||
# Fedora
|
||||
dnf install zlib-devel libuuid-devel libmnl-devel gcc make git autoconf autoconf-archive autogen automake pkgconfig curl findutils
|
||||
dnf install zlib-devel libuuid-devel libuv-devel lz4-devel Judy-devel openssl-devel libmnl-devel gcc make git autoconf autoconf-archive autogen automake pkgconfig curl findutils
|
||||
|
||||
# CentOS / Red Hat Enterprise Linux
|
||||
yum install autoconf automake curl gcc git libmnl-devel libuuid-devel lm_sensors make MySQL-python nc pkgconfig python python-psycopg2 PyYAML zlib-devel
|
||||
yum install autoconf automake curl gcc git libmnl-devel libuuid-devel openssl-devel libuv-devel lz4-devel Judy-devel lm_sensors make MySQL-python nc pkgconfig python python-psycopg2 PyYAML zlib-devel
|
||||
|
||||
```
|
||||
|
||||
|
@ -234,6 +234,17 @@ package|description
|
|||
|
||||
*Netdata will greatly benefit if you have the above packages installed, but it will still work without them.*
|
||||
|
||||
Netdata DB engine can be enabled when these are installed (they are optional):
|
||||
|
||||
|package|description|
|
||||
|:-----:|-----------|
|
||||
|`libuv`|multi-platform support library with a focus on asynchronous I/O|
|
||||
|`liblz4`|Extremely Fast Compression algorithm|
|
||||
|`Judy`|General purpose dynamic array|
|
||||
|`openssl`|Cryptography and SSL/TLS Toolkit|
|
||||
|
||||
*Netdata will greatly benefit if you have the above packages installed, but it will still work without them.*
|
||||
|
||||
---
|
||||
|
||||
### Install Netdata
|
||||
|
|
|
@ -73,7 +73,7 @@ These are options that affect the operation of netdata in this area:
|
|||
|
||||
```
|
||||
[global]
|
||||
memory mode = none | ram | save | map
|
||||
memory mode = none | ram | save | map | dbengine
|
||||
```
|
||||
|
||||
`[global].memory mode = none` disables the database at this host. This also disables health
|
||||
|
@ -170,6 +170,10 @@ the unique id the netdata generating the metrics (i.e. the netdata that original
|
|||
them `/var/lib/netdata/registry/netdata.unique.id`). So, metrics for netdata `A` that pass through
|
||||
any number of other netdata, will have the same `MACHINE_GUID`.
|
||||
|
||||
You can also use `default memory mode = dbengine` for an API key or `memory mode = dbengine` for
|
||||
a single host. The additional `page cache size` and `dbengine disk space` configuration options
|
||||
are inherited from the global netdata configuration.
|
||||
|
||||
##### allow from
|
||||
|
||||
`allow from` settings are [netdata simple patterns](../libnetdata/simple_pattern): string matches
|
||||
|
|
|
@ -103,10 +103,11 @@
|
|||
# You can also set it per host below.
|
||||
# If you don't set it here, the memory mode of netdata.conf will be used.
|
||||
# Valid modes:
|
||||
# save save on exit, load on start
|
||||
# map like swap (continuously syncing to disks - you need SSD)
|
||||
# ram keep it in RAM, don't touch the disk
|
||||
# none no database at all (use this on headless proxies)
|
||||
# save save on exit, load on start
|
||||
# map like swap (continuously syncing to disks - you need SSD)
|
||||
# ram keep it in RAM, don't touch the disk
|
||||
# none no database at all (use this on headless proxies)
|
||||
# dbengine like a traditional database
|
||||
default memory mode = ram
|
||||
|
||||
# Shall we enable health monitoring for the hosts using this API key?
|
||||
|
@ -167,7 +168,7 @@
|
|||
# The number of entries in the database
|
||||
history = 3600
|
||||
|
||||
# The memory mode of the database: save | map | ram | none
|
||||
# The memory mode of the database: save | map | ram | none | dbengine
|
||||
memory mode = save
|
||||
|
||||
# Health / alarms control: yes | no | auto
|
||||
|
|
|
@ -96,12 +96,19 @@ void rrdr_json_wrapper_begin(RRDR *r, BUFFER *wb, uint32_t format, RRDR_OPTIONS
|
|||
if(i) buffer_strcat(wb, ", ");
|
||||
i++;
|
||||
|
||||
calculated_number value = rd->last_stored_value;
|
||||
if (NAN == value)
|
||||
buffer_strcat(wb, "null");
|
||||
else
|
||||
buffer_rrd_value(wb, value);
|
||||
/*
|
||||
storage_number n = rd->values[rrdset_last_slot(r->st)];
|
||||
|
||||
if(!does_storage_number_exist(n))
|
||||
buffer_strcat(wb, "null");
|
||||
else
|
||||
buffer_rrd_value(wb, unpack_storage_number(n));
|
||||
*/
|
||||
}
|
||||
if(!i) {
|
||||
rows = 0;
|
||||
|
|
|
@ -7,6 +7,9 @@
|
|||
void rrdset2json(RRDSET *st, BUFFER *wb, size_t *dimensions_count, size_t *memory_used) {
|
||||
rrdset_rdlock(st);
|
||||
|
||||
time_t first_entry_t = rrdset_first_entry_t(st);
|
||||
time_t last_entry_t = rrdset_last_entry_t(st);
|
||||
|
||||
buffer_sprintf(wb,
|
||||
"\t\t{\n"
|
||||
"\t\t\t\"id\": \"%s\",\n"
|
||||
|
@ -40,9 +43,9 @@ void rrdset2json(RRDSET *st, BUFFER *wb, size_t *dimensions_count, size_t *memor
|
|||
, st->units
|
||||
, st->name
|
||||
, rrdset_type_name(st->chart_type)
|
||||
, st->entries * st->update_every
|
||||
, rrdset_first_entry_t(st)
|
||||
, rrdset_last_entry_t(st)
|
||||
, last_entry_t - first_entry_t + st->update_every//st->entries * st->update_every
|
||||
, first_entry_t//rrdset_first_entry_t(st)
|
||||
, last_entry_t//rrdset_last_entry_t(st)
|
||||
, st->update_every
|
||||
);
|
||||
|
||||
|
|
|
@ -381,13 +381,9 @@ static inline void do_dimension(
|
|||
, long points_wanted
|
||||
, RRDDIM *rd
|
||||
, long dim_id_in_rrdr
|
||||
, long after_slot
|
||||
, long before_slot
|
||||
, time_t after_wanted
|
||||
, time_t before_wanted
|
||||
){
|
||||
(void) before_slot;
|
||||
|
||||
RRDSET *st = r->st;
|
||||
|
||||
time_t
|
||||
|
@ -397,21 +393,22 @@ static inline void do_dimension(
|
|||
min_date = 0;
|
||||
|
||||
long
|
||||
slot = after_slot,
|
||||
group_size = r->group,
|
||||
points_added = 0,
|
||||
values_in_group = 0,
|
||||
values_in_group_non_zero = 0,
|
||||
rrdr_line = -1,
|
||||
entries = st->entries;
|
||||
rrdr_line = -1;
|
||||
|
||||
RRDR_VALUE_FLAGS
|
||||
group_value_flags = RRDR_VALUE_NOTHING;
|
||||
|
||||
struct rrddim_query_handle handle;
|
||||
uint8_t initialized_query;
|
||||
|
||||
calculated_number min = r->min, max = r->max;
|
||||
size_t db_points_read = 0;
|
||||
for( ; points_added < points_wanted ; now += dt, slot++ ) {
|
||||
if(unlikely(slot >= entries)) slot = 0;
|
||||
|
||||
for(initialized_query = 0 ; points_added < points_wanted ; now += dt) {
|
||||
|
||||
// make sure we return data in the proper time range
|
||||
if(unlikely(now > before_wanted)) {
|
||||
|
@ -427,8 +424,23 @@ static inline void do_dimension(
|
|||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(!initialized_query)) {
|
||||
rd->state->query_ops.init(rd, &handle, now, before_wanted);
|
||||
initialized_query = 1;
|
||||
}
|
||||
// read the value from the database
|
||||
storage_number n = rd->values[slot];
|
||||
//storage_number n = rd->values[slot];
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
#ifdef ENABLE_DBENGINE
|
||||
if (now != handle.rrdeng.now)
|
||||
error("INTERNAL CHECK: Unaligned query for %s, database time: %ld, expected time: %ld", rd->id, (long)handle.rrdeng.now, (long)now);
|
||||
#endif
|
||||
} else if (rrdset_time2slot(st, now) != (long unsigned)handle.slotted.slot) {
|
||||
error("INTERNAL CHECK: Unaligned query for %s, database slot: %lu, expected slot: %lu", rd->id, (long unsigned)handle.slotted.slot, rrdset_time2slot(st, now));
|
||||
}
|
||||
#endif
|
||||
storage_number n = rd->state->query_ops.next_metric(&handle);
|
||||
calculated_number value = NAN;
|
||||
if(likely(does_storage_number_exist(n))) {
|
||||
|
||||
|
@ -485,6 +497,8 @@ static inline void do_dimension(
|
|||
values_in_group_non_zero = 0;
|
||||
}
|
||||
}
|
||||
if (likely(initialized_query))
|
||||
rd->state->query_ops.finalize(&handle);
|
||||
|
||||
r->internal.db_points_read += db_points_read;
|
||||
r->internal.result_points_generated += points_added;
|
||||
|
@ -517,15 +531,15 @@ static void rrd2rrdr_log_request_response_metdata(RRDR *r
|
|||
, time_t before_requested
|
||||
, long points_requested
|
||||
, long points_wanted
|
||||
, size_t after_slot
|
||||
, size_t before_slot
|
||||
//, size_t after_slot
|
||||
//, size_t before_slot
|
||||
, const char *msg
|
||||
) {
|
||||
info("INTERNAL ERROR: rrd2rrdr() on %s update every %d with %s grouping %s (group: %ld, resampling_time: %ld, resampling_group: %ld), "
|
||||
"after (got: %zu, want: %zu, req: %zu, db: %zu), "
|
||||
"before (got: %zu, want: %zu, req: %zu, db: %zu), "
|
||||
"duration (got: %zu, want: %zu, req: %zu, db: %zu), "
|
||||
"slot (after: %zu, before: %zu, delta: %zu), "
|
||||
//"slot (after: %zu, before: %zu, delta: %zu), "
|
||||
"points (got: %ld, want: %ld, req: %ld, db: %ld), "
|
||||
"%s"
|
||||
, r->st->name
|
||||
|
@ -557,9 +571,11 @@ static void rrd2rrdr_log_request_response_metdata(RRDR *r
|
|||
, (size_t)((rrdset_last_entry_t(r->st) - rrdset_first_entry_t(r->st)) + r->st->update_every)
|
||||
|
||||
// slot
|
||||
/*
|
||||
, after_slot
|
||||
, before_slot
|
||||
, (after_slot > before_slot) ? (r->st->entries - after_slot + before_slot) : (before_slot - after_slot)
|
||||
*/
|
||||
|
||||
// points
|
||||
, r->rows
|
||||
|
@ -721,7 +737,7 @@ RRDR *rrd2rrdr(
|
|||
|
||||
before_wanted = last_entry_t - (last_entry_t % ( ((aligned)?group:1) * st->update_every ));
|
||||
}
|
||||
size_t before_slot = rrdset_time2slot(st, before_wanted);
|
||||
//size_t before_slot = rrdset_time2slot(st, before_wanted);
|
||||
|
||||
// we need to estimate the number of points, for having
|
||||
// an integer number of values per point
|
||||
|
@ -743,7 +759,7 @@ RRDR *rrd2rrdr(
|
|||
after_wanted = first_entry_t - (first_entry_t % ( ((aligned)?group:1) * st->update_every )) + ( ((aligned)?group:1) * st->update_every );
|
||||
}
|
||||
}
|
||||
size_t after_slot = rrdset_time2slot(st, after_wanted);
|
||||
//size_t after_slot = rrdset_time2slot(st, after_wanted);
|
||||
|
||||
// check if they are reversed
|
||||
if(unlikely(after_wanted > before_wanted)) {
|
||||
|
@ -779,11 +795,13 @@ RRDR *rrd2rrdr(
|
|||
if(before_wanted > last_entry_t)
|
||||
error("INTERNAL CHECK: before_wanted %u is too big, maximum %u", (uint32_t)before_wanted, (uint32_t)last_entry_t);
|
||||
|
||||
/*
|
||||
if(before_slot >= (size_t)st->entries)
|
||||
error("INTERNAL CHECK: before_slot is invalid %zu, expected 0 to %ld", before_slot, st->entries - 1);
|
||||
|
||||
if(after_slot >= (size_t)st->entries)
|
||||
error("INTERNAL CHECK: after_slot is invalid %zu, expected 0 to %ld", after_slot, st->entries - 1);
|
||||
*/
|
||||
|
||||
if(points_wanted > (before_wanted - after_wanted) / group / st->update_every + 1)
|
||||
error("INTERNAL CHECK: points_wanted %ld is more than points %ld", points_wanted, (before_wanted - after_wanted) / group / st->update_every + 1);
|
||||
|
@ -900,8 +918,6 @@ RRDR *rrd2rrdr(
|
|||
, points_wanted
|
||||
, rd
|
||||
, c
|
||||
, after_slot
|
||||
, before_slot
|
||||
, after_wanted
|
||||
, before_wanted
|
||||
);
|
||||
|
@ -947,27 +963,27 @@ RRDR *rrd2rrdr(
|
|||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
|
||||
if(r->internal.log)
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, after_slot, before_slot, r->internal.log);
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, /*after_slot, before_slot,*/ r->internal.log);
|
||||
|
||||
if(r->rows != points_wanted)
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, after_slot, before_slot, "got 'points' is not wanted 'points'");
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, /*after_slot, before_slot,*/ "got 'points' is not wanted 'points'");
|
||||
|
||||
if(aligned && (r->before % group) != 0)
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, after_slot, before_slot, "'before' is not aligned but alignment is required");
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, /*after_slot, before_slot,*/ "'before' is not aligned but alignment is required");
|
||||
|
||||
// 'after' should not be aligned, since we start inside the first group
|
||||
//if(aligned && (r->after % group) != 0)
|
||||
// rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, after_slot, before_slot, "'after' is not aligned but alignment is required");
|
||||
|
||||
if(r->before != before_requested)
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, after_slot, before_slot, "chart is not aligned to requested 'before'");
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, /*after_slot, before_slot,*/ "chart is not aligned to requested 'before'");
|
||||
|
||||
if(r->before != before_wanted)
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, after_slot, before_slot, "got 'before' is not wanted 'before'");
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, /*after_slot, before_slot,*/ "got 'before' is not wanted 'before'");
|
||||
|
||||
// reported 'after' varies, depending on group
|
||||
if(r->after != after_wanted)
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, after_slot, before_slot, "got 'after' is not wanted 'after'");
|
||||
rrd2rrdr_log_request_response_metdata(r, group_method, aligned, group, resampling_time_requested, resampling_group, after_wanted, after_requested, before_wanted, before_requested, points_requested, points_wanted, /*after_slot, before_slot,*/ "got 'after' is not wanted 'after'");
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue