0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-30 23:50:04 +00:00
netdata_netdata/src/database/sqlite/sqlite_metadata.c
Stelios Fragkakis de88c30dca
Metadata event loop code cleanup ()
* Remove query counter

* Refactor metadata worker structure to use stack-allocated event loop and improve initialization checks
Code cleanup

* Refactor metadata_queue_load_host_context to remove unnecessary parameter
2025-03-21 20:39:58 +02:00

3022 lines
103 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "sqlite_metadata.h"
#include "database/sqlite/vendored/sqlite3recover.h"
#include "health/health-alert-entry.h"
#include "sqlite_db_migration.h"
#define DB_METADATA_VERSION 18
#define COMPUTE_DURATION(var_name, unit, start, end) \
char var_name[64]; \
duration_snprintf(var_name, sizeof(var_name), \
(int64_t)((end) - (start)), unit, true)
extern long long def_journal_size_limit;
const char *database_config[] = {
"CREATE TABLE IF NOT EXISTS host(host_id BLOB PRIMARY KEY, hostname TEXT NOT NULL, "
"registry_hostname TEXT NOT NULL default 'unknown', update_every INT NOT NULL default 1, "
"os TEXT NOT NULL default 'unknown', timezone TEXT NOT NULL default 'unknown', tags TEXT NOT NULL default '',"
"hops INT NOT NULL DEFAULT 0,"
"memory_mode INT DEFAULT 0, abbrev_timezone TEXT DEFAULT '', utc_offset INT NOT NULL DEFAULT 0,"
"program_name TEXT NOT NULL DEFAULT 'unknown', program_version TEXT NOT NULL DEFAULT 'unknown', "
"entries INT NOT NULL DEFAULT 0,"
"health_enabled INT NOT NULL DEFAULT 0, last_connected INT NOT NULL DEFAULT 0)",
"CREATE TABLE IF NOT EXISTS chart(chart_id blob PRIMARY KEY, host_id blob, type text, id text, name text, "
"family text, context text, title text, unit text, plugin text, module text, priority int, update_every int, "
"chart_type int, memory_mode int, history_entries)",
"CREATE TABLE IF NOT EXISTS dimension(dim_id blob PRIMARY KEY, chart_id blob, id text, name text, "
"multiplier int, divisor int , algorithm int, options text)",
"CREATE TABLE IF NOT EXISTS metadata_migration(filename text, file_size, date_created int)",
"CREATE TABLE IF NOT EXISTS chart_label(chart_id blob, source_type int, label_key text, "
"label_value text, date_created int, PRIMARY KEY (chart_id, label_key))",
"CREATE TRIGGER IF NOT EXISTS del_chart_label AFTER DELETE ON chart "
"BEGIN DELETE FROM chart_label WHERE chart_id = old.chart_id; END",
"CREATE TRIGGER IF NOT EXISTS del_chart "
"AFTER DELETE ON dimension "
"FOR EACH ROW "
"BEGIN"
" DELETE FROM chart WHERE chart_id = OLD.chart_id "
" AND NOT EXISTS (SELECT 1 FROM dimension WHERE chart_id = OLD.chart_id);"
"END",
"CREATE TABLE IF NOT EXISTS node_instance (host_id blob PRIMARY KEY, claim_id, node_id, date_created)",
"CREATE TABLE IF NOT EXISTS alert_hash(hash_id blob PRIMARY KEY, date_updated int, alarm text, template text, "
"on_key text, class text, component text, type text, os text, hosts text, lookup text, "
"every text, units text, calc text, families text, plugin text, module text, charts text, green text, "
"red text, warn text, crit text, exec text, to_key text, info text, delay text, options text, "
"repeat text, host_labels text, p_db_lookup_dimensions text, p_db_lookup_method text, p_db_lookup_options int, "
"p_db_lookup_after int, p_db_lookup_before int, p_update_every int, source text, chart_labels text, "
"summary text, time_group_condition INT, time_group_value DOUBLE, dims_group INT, data_source INT)",
"CREATE TABLE IF NOT EXISTS host_info(host_id blob, system_key text NOT NULL, system_value text NOT NULL, "
"date_created INT, PRIMARY KEY(host_id, system_key))",
"CREATE TABLE IF NOT EXISTS host_label(host_id blob, source_type int, label_key text NOT NULL, "
"label_value text NOT NULL, date_created INT, PRIMARY KEY (host_id, label_key))",
"CREATE TRIGGER IF NOT EXISTS ins_host AFTER INSERT ON host BEGIN INSERT INTO node_instance (host_id, date_created)"
" SELECT new.host_id, unixepoch() WHERE new.host_id NOT IN (SELECT host_id FROM node_instance); END",
"CREATE TABLE IF NOT EXISTS health_log (health_log_id INTEGER PRIMARY KEY, host_id blob, alarm_id int, "
"config_hash_id blob, name text, chart text, family text, recipient text, units text, exec text, "
"chart_context text, last_transition_id blob, chart_name text, UNIQUE (host_id, alarm_id))",
"CREATE TABLE IF NOT EXISTS health_log_detail (health_log_id int, unique_id int, alarm_id int, alarm_event_id int, "
"updated_by_id int, updates_id int, when_key int, duration int, non_clear_duration int, "
"flags int, exec_run_timestamp int, delay_up_to_timestamp int, "
"info text, exec_code int, new_status real, old_status real, delay int, "
"new_value double, old_value double, last_repeat int, transition_id blob, global_id int, summary text)",
"CREATE INDEX IF NOT EXISTS ind_d2 on dimension (chart_id)",
"CREATE INDEX IF NOT EXISTS ind_c3 on chart (host_id)",
"CREATE INDEX IF NOT EXISTS health_log_ind_1 ON health_log (host_id)",
"CREATE INDEX IF NOT EXISTS health_log_d_ind_2 ON health_log_detail (global_id)",
"CREATE INDEX IF NOT EXISTS health_log_d_ind_3 ON health_log_detail (transition_id)",
"CREATE INDEX IF NOT EXISTS health_log_d_ind_9 ON health_log_detail (unique_id DESC, health_log_id)",
"CREATE INDEX IF NOT EXISTS health_log_d_ind_6 on health_log_detail (health_log_id, when_key)",
"CREATE INDEX IF NOT EXISTS health_log_d_ind_7 on health_log_detail (alarm_id)",
"CREATE INDEX IF NOT EXISTS health_log_d_ind_8 on health_log_detail (new_status, updated_by_id)",
"CREATE TABLE IF NOT EXISTS agent_event_log (id INTEGER PRIMARY KEY, version TEXT, event_type INT, value, date_created INT)",
"CREATE INDEX IF NOT EXISTS idx_agent_event_log1 on agent_event_log (event_type)",
"CREATE TABLE IF NOT EXISTS alert_queue "
" (host_id BLOB, health_log_id INT, unique_id INT, alarm_id INT, status INT, date_scheduled INT, "
" UNIQUE(host_id, health_log_id, alarm_id))",
"CREATE INDEX IF NOT EXISTS ind_alert_queue1 ON alert_queue(host_id, date_scheduled)",
"CREATE TABLE IF NOT EXISTS alert_version (health_log_id INTEGER PRIMARY KEY, unique_id INT, status INT, "
"version INT, date_submitted INT)",
"CREATE TABLE IF NOT EXISTS aclk_queue (sequence_id INTEGER PRIMARY KEY, host_id blob, health_log_id INT, "
"unique_id INT, date_created INT, UNIQUE(host_id, health_log_id))",
"CREATE TABLE IF NOT EXISTS ctx_metadata_cleanup (id INTEGER PRIMARY KEY, host_id BLOB, context TEXT NOT NULL, date_created INT NOT NULL, "
"UNIQUE (host_id, context))",
NULL
};
const char *database_cleanup[] = {
"DELETE FROM host WHERE host_id NOT IN (SELECT host_id FROM chart)",
"DELETE FROM node_instance WHERE host_id NOT IN (SELECT host_id FROM host)",
"DELETE FROM host_info WHERE host_id NOT IN (SELECT host_id FROM host)",
"DELETE FROM host_label WHERE host_id NOT IN (SELECT host_id FROM host)",
"DELETE FROM ctx_metadata_cleanup WHERE host_id NOT IN (SELECT host_id FROM host)",
"DROP TRIGGER IF EXISTS tr_dim_del",
"DROP INDEX IF EXISTS ind_d1",
"DROP INDEX IF EXISTS ind_c1",
"DROP INDEX IF EXISTS ind_c2",
"DROP INDEX IF EXISTS alert_hash_index",
"DROP INDEX IF EXISTS health_log_d_ind_4",
"DROP INDEX IF EXISTS health_log_d_ind_1",
"DROP INDEX IF EXISTS health_log_d_ind_5",
NULL
};
sqlite3 *db_meta = NULL;
// SQL statements
#define SQL_STORE_CLAIM_ID \
"INSERT INTO node_instance " \
"(host_id, claim_id, date_created) VALUES (@host_id, @claim_id, UNIXEPOCH()) " \
"ON CONFLICT(host_id) DO UPDATE SET claim_id = excluded.claim_id"
#define SQL_DELETE_HOST_LABELS "DELETE FROM host_label WHERE host_id = @uuid"
#define STORE_HOST_LABEL \
"INSERT INTO host_label (host_id, source_type, label_key, label_value, date_created) VALUES "
#define STORE_CHART_LABEL \
"INSERT INTO chart_label (chart_id, source_type, label_key, label_value, date_created) VALUES "
#define STORE_HOST_OR_CHART_LABEL_VALUE "(u2h('%s'), %d,'%s','%s', unixepoch())"
#define DELETE_DIMENSION_UUID "DELETE FROM dimension WHERE dim_id = @uuid"
#define SQL_STORE_HOST_INFO \
"INSERT OR REPLACE INTO host (host_id, hostname, registry_hostname, update_every, os, timezone, tags, hops, " \
"memory_mode, abbrev_timezone, utc_offset, program_name, program_version, entries, health_enabled, last_connected) " \
"VALUES (@host_id, @hostname, @registry_hostname, @update_every, @os, @timezone, @tags, @hops, " \
"@memory_mode, @abbrev_tz, @utc_offset, @prog_name, @prog_version, @entries, @health_enabled, @last_connected)"
#define SQL_STORE_CHART \
"INSERT INTO chart (chart_id, host_id, type, id, name, family, context, title, unit, plugin, module, priority, " \
"update_every, chart_type, memory_mode, history_entries) " \
"values (@chart_id, @host_id, @type, @id, @name, @family, @context, @title, @unit, @plugin, @module, @priority, " \
"@update_every, @chart_type, @memory_mode, @history_entries) " \
"ON CONFLICT(chart_id) DO UPDATE SET type=excluded.type, id=excluded.id, name=excluded.name, " \
"family=excluded.family, context=excluded.context, title=excluded.title, unit=excluded.unit, " \
"plugin=excluded.plugin, module=excluded.module, priority=excluded.priority, update_every=excluded.update_every, " \
"chart_type=excluded.chart_type, memory_mode = excluded.memory_mode, history_entries = excluded.history_entries"
#define SQL_STORE_DIMENSION \
"INSERT INTO dimension (dim_id, chart_id, id, name, multiplier, divisor , algorithm, options) " \
"VALUES (@dim_id, @chart_id, @id, @name, @multiplier, @divisor, @algorithm, @options) " \
"ON CONFLICT(dim_id) DO UPDATE SET id=excluded.id, name=excluded.name, multiplier=excluded.multiplier, " \
"divisor=excluded.divisor, algorithm=excluded.algorithm, options=excluded.options"
#define SELECT_DIMENSION_LIST "SELECT dim_id, rowid FROM dimension WHERE rowid > @row_id"
#define SELECT_CHART_LIST "SELECT chart_id, rowid FROM chart WHERE rowid > @row_id"
#define SELECT_CHART_LABEL_LIST "SELECT chart_id, rowid FROM chart_label WHERE rowid > @row_id"
#define SQL_STORE_HOST_SYSTEM_INFO_VALUES \
"INSERT OR REPLACE INTO host_info (host_id, system_key, system_value, date_created) VALUES " \
"(@uuid, @name, @value, UNIXEPOCH())"
#define CONVERT_EXISTING_LOCALHOST "UPDATE host SET hops = 1 WHERE hops = 0 AND host_id <> @host_id"
#define DELETE_MISSING_NODE_INSTANCES "DELETE FROM node_instance WHERE host_id NOT IN (SELECT host_id FROM host)"
#define METADATA_MAINTENANCE_FIRST_CHECK (1800) // Maintenance first run after agent startup in seconds
#define METADATA_MAINTENANCE_REPEAT (60) // Repeat if last run for dimensions, charts, labels needs more work
#define METADATA_MAINTENANCE_CTX_CLEAN_REPEAT (300) // Repeat if last run for dimensions, charts, labels needs more work
#define METADATA_HEALTH_LOG_INTERVAL (3600) // Repeat maintenance for health
#define METADATA_LABEL_CHECK_INTERVAL (3600) // Repeat maintenance for labels
#define METADATA_RUNTIME_THRESHOLD (5) // Run time threshold for cleanup task
#define METADATA_HOST_CHECK_FIRST_CHECK (5) // First check for pending metadata
#define METADATA_HOST_CHECK_INTERVAL (5) // Repeat check for pending metadata
#define METADATA_MAX_BATCH_SIZE (512) // Maximum commands to execute before running the event loop
#define DATABASE_VACUUM_FREQUENCY_SECONDS (60)
#define DATABASE_FREE_PAGES_THRESHOLD_PC (5) // Percentage of free pages to trigger vacuum
#define DATABASE_FREE_PAGES_VACUUM_PC (10) // Percentage of free pages to vacuum
enum metadata_opcode {
METADATA_DATABASE_NOOP = 0,
METADATA_DEL_DIMENSION,
METADATA_STORE_CLAIM_ID,
METADATA_SCAN_HOSTS,
METADATA_LOAD_HOST_CONTEXT,
METADATA_DELETE_HOST_CHART_LABELS,
METADATA_ADD_HOST_AE,
METADATA_DEL_HOST_AE,
METADATA_ADD_CTX_CLEANUP,
METADATA_EXECUTE_STORE_STATEMENT,
METADATA_MAINTENANCE,
METADATA_SYNC_SHUTDOWN,
METADATA_UNITTEST,
// leave this last
// we need it to check for worker utilization
METADATA_MAX_ENUMERATIONS_DEFINED
};
#define MAX_PARAM_LIST (2)
struct metadata_cmd {
enum metadata_opcode opcode;
struct completion *completion;
const void *param[MAX_PARAM_LIST];
struct metadata_cmd *prev, *next;
};
typedef enum {
METADATA_FLAG_PROCESSING = (1 << 0), // store or cleanup
METADATA_FLAG_SHUTDOWN = (1 << 1), // Shutting down
} METADATA_FLAG;
struct metadata_wc {
uv_thread_t thread;
uv_loop_t loop;
uv_async_t async;
uv_timer_t timer_req;
time_t metadata_check_after;
Pvoid_t ae_DelJudyL;
METADATA_FLAG flags;
bool initialized;
SPINLOCK cmd_queue_lock;
struct completion start_stop_complete;
struct completion *scan_complete;
/* FIFO command queue */
struct metadata_cmd *cmd_base;
ARAL *ar;
} metasync_worker;
#define metadata_flag_check(target_flags, flag) (__atomic_load_n(&((target_flags)->flags), __ATOMIC_SEQ_CST) & (flag))
#define metadata_flag_set(target_flags, flag) __atomic_or_fetch(&((target_flags)->flags), (flag), __ATOMIC_SEQ_CST)
#define metadata_flag_clear(target_flags, flag) __atomic_and_fetch(&((target_flags)->flags), ~(flag), __ATOMIC_SEQ_CST)
//
// For unittest
//
struct thread_unittest {
int join;
unsigned added;
unsigned processed;
unsigned *done;
};
int sql_metadata_cache_stats(int op)
{
int count, dummy;
if (!REQUIRE_DB(db_meta))
return 0;
sqlite3_db_status(db_meta, op, &count, &dummy, 0);
return count;
}
static inline void set_host_node_id(RRDHOST *host, nd_uuid_t *node_id)
{
if (unlikely(!host))
return;
if (unlikely(!node_id)) {
host->node_id = UUID_ZERO;
return;
}
struct aclk_sync_cfg_t *wc = host->aclk_config;
uuid_copy(host->node_id.uuid, *node_id);
if (unlikely(!wc))
create_aclk_config(host, &host->host_id.uuid, node_id);
else
uuid_unparse_lower(*node_id, wc->node_id);
stream_receiver_send_node_and_claim_id_to_child(host);
stream_path_node_id_updated(host);
}
struct host_ctx_cleanup_s {
nd_uuid_t host_uuid;
STRING *context;
};
#define CTX_DELETE_CONTEXT_META_CLEANUP_ITEM "DELETE FROM ctx_metadata_cleanup WHERE host_id = @host_id AND context = @context"
static void ctx_delete_metadata_cleanup_context(sqlite3_stmt **res, nd_uuid_t *host_uuid, const char *context)
{
if (!*res) {
if (!PREPARE_STATEMENT(db_meta, CTX_DELETE_CONTEXT_META_CLEANUP_ITEM, res))
return;
}
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(*res, ++param, host_uuid, sizeof(*host_uuid), SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, context, -1, SQLITE_STATIC));
param = 0;
int rc = sqlite3_step_monitored(*res);
if (rc != SQLITE_DONE)
error_report("Failed to delete context check entry, rc = %d", rc);
done:
REPORT_BIND_FAIL(*res, param);
SQLITE_RESET(*res);
}
#define CTX_GET_CONTEXT_META_CLEANUP_LIST "SELECT context FROM ctx_metadata_cleanup WHERE host_id = @host_id"
static void ctx_get_context_list_to_cleanup(nd_uuid_t *host_uuid, void (*cleanup_cb)(Pvoid_t JudyL, void *data), void *data)
{
if (unlikely(!host_uuid))
return;
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, CTX_GET_CONTEXT_META_CLEANUP_LIST, &res))
return;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, host_uuid, sizeof(*host_uuid), SQLITE_STATIC));
param = 0;
const char *context;
Pvoid_t CTX_JudyL = NULL;
Pvoid_t *Pvalue;
while (sqlite3_step_monitored(res) == SQLITE_ROW) {
context = (char *) sqlite3_column_text(res, 0);
STRING *ctx = string_strdupz(context);
Pvalue = JudyLIns(&CTX_JudyL, (Word_t) ctx, PJE0);
if (*Pvalue)
string_freez(ctx);
else
*(int *)Pvalue = 1;
}
if (CTX_JudyL) {
cleanup_cb(CTX_JudyL, data);
bool first = true;
Word_t Index = 0;
while ((Pvalue = JudyLFirstThenNext(CTX_JudyL, &Index, &first))) {
STRING *ctx = (STRING *) Index;
string_freez(ctx);
}
}
(void)JudyLFreeArray(&CTX_JudyL, PJE0);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
}
#define SQL_SCHEDULE_HOST_CTX_CLEANUP \
"INSERT INTO ctx_metadata_cleanup (host_id, context, date_created) " \
"VALUES (@host_id, @context, UNIXEPOCH()) ON CONFLICT DO UPDATE SET date_created = excluded.date_created; END"
// Schedule context cleanup for host
static void sql_schedule_host_ctx_cleanup(sqlite3_stmt **res, nd_uuid_t *host_id, const char *context)
{
if (!*res) {
if (!PREPARE_STATEMENT(db_meta, SQL_SCHEDULE_HOST_CTX_CLEANUP, res))
return;
}
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(*res, ++param, host_id, sizeof(*host_id), SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, context, -1, SQLITE_STATIC));
param = 0;
int rc = execute_insert(*res);
if (rc != SQLITE_DONE)
error_report("Failed to host context check data, rc = %d", rc);
done:
REPORT_BIND_FAIL(*res, param);
SQLITE_RESET(*res);
}
#define SQL_SET_HOST_LABEL \
"INSERT INTO host_label (host_id, source_type, label_key, label_value, date_created) " \
"VALUES (@host_id, @source_type, @label_key, @label_value, UNIXEPOCH()) ON CONFLICT (host_id, label_key) " \
" DO UPDATE SET source_type = excluded.source_type, label_value=excluded.label_value, date_created=UNIXEPOCH()"
bool sql_set_host_label(nd_uuid_t *host_id, const char *label_key, const char *label_value)
{
sqlite3_stmt *res = NULL;
bool status = false;
if (!label_key || !label_value || !host_id)
return false;
if (!PREPARE_STATEMENT(db_meta, SQL_SET_HOST_LABEL, &res))
return 1;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, host_id, sizeof(*host_id), SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, RRDLABEL_SRC_AUTO));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(res, ++param, label_key, -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(res, ++param, label_value, -1, SQLITE_STATIC));
param = 0;
int rc = execute_insert(res);
status = (rc == SQLITE_DONE);
if (false == status)
error_report("Failed to store node instance information, rc = %d", rc);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
return status;
}
#define SQL_UPDATE_NODE_ID "UPDATE node_instance SET node_id = @node_id WHERE host_id = @host_id"
void sql_update_node_id(nd_uuid_t *host_id, nd_uuid_t *node_id)
{
sqlite3_stmt *res = NULL;
RRDHOST *host = NULL;
char host_guid[GUID_LEN + 1];
uuid_unparse_lower(*host_id, host_guid);
rrd_wrlock();
host = rrdhost_find_by_guid(host_guid);
if (likely(host))
set_host_node_id(host, node_id);
rrd_wrunlock();
if (!REQUIRE_DB(db_meta))
return;
if (!PREPARE_STATEMENT(db_meta, SQL_UPDATE_NODE_ID, &res))
return;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, node_id, sizeof(*node_id), SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, host_id, sizeof(*host_id), SQLITE_STATIC));
param = 0;
int rc = sqlite3_step_monitored(res);
if (unlikely(rc != SQLITE_DONE))
error_report("Failed to store node instance information, rc = %d", rc);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
}
#define SQL_SELECT_NODE_ID "SELECT node_id FROM node_instance WHERE host_id = @host_id AND node_id IS NOT NULL"
int get_node_id(nd_uuid_t *host_id, nd_uuid_t *node_id)
{
sqlite3_stmt *res = NULL;
if (!REQUIRE_DB(db_meta))
return 1;
if (!PREPARE_STATEMENT(db_meta, SQL_SELECT_NODE_ID, &res))
return 1;
int param = 0, rc = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, host_id, sizeof(*host_id), SQLITE_STATIC));
param = 0;
rc = sqlite3_step_monitored(res);
if (likely(rc == SQLITE_ROW && node_id))
uuid_copy(*node_id, *((nd_uuid_t *) sqlite3_column_blob(res, 0)));
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
return (rc == SQLITE_ROW) ? 0 : -1;
}
#define SQL_INVALIDATE_NODE_INSTANCES \
"UPDATE node_instance SET node_id = NULL WHERE EXISTS " \
"(SELECT host_id FROM node_instance WHERE host_id = @host_id AND (@claim_id IS NULL OR claim_id <> @claim_id))"
void invalidate_node_instances(nd_uuid_t *host_id, nd_uuid_t *claim_id)
{
sqlite3_stmt *res = NULL;
if (!REQUIRE_DB(db_meta))
return;
if (!PREPARE_STATEMENT(db_meta, SQL_INVALIDATE_NODE_INSTANCES, &res))
return;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, host_id, sizeof(*host_id), SQLITE_STATIC));
if (claim_id)
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, claim_id, sizeof(*claim_id), SQLITE_STATIC));
else
SQLITE_BIND_FAIL(done, sqlite3_bind_null(res, ++param));
param = 0;
int rc = execute_insert(res);
if (unlikely(rc != SQLITE_DONE))
error_report("Failed to invalidate node instance information, rc = %d", rc);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
}
#define SQL_GET_HOST_NODE_ID "SELECT node_id FROM node_instance WHERE host_id = @host_id"
void sql_load_node_id(RRDHOST *host)
{
sqlite3_stmt *res = NULL;
if (!REQUIRE_DB(db_meta))
return;
if (!PREPARE_STATEMENT(db_meta, SQL_GET_HOST_NODE_ID, &res))
return;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC));
param = 0;
int rc = sqlite3_step_monitored(res);
if (likely(rc == SQLITE_ROW)) {
if (likely(sqlite3_column_bytes(res, 0) == sizeof(nd_uuid_t)))
set_host_node_id(host, (nd_uuid_t *)sqlite3_column_blob(res, 0));
else
set_host_node_id(host, NULL);
}
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
}
#define SELECT_HOST_INFO "SELECT system_key, system_value FROM host_info WHERE host_id = @host_id"
void sql_build_host_system_info(nd_uuid_t *host_id, struct rrdhost_system_info *system_info)
{
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, SELECT_HOST_INFO, &res))
return;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, host_id, sizeof(*host_id), SQLITE_STATIC));
param = 0;
while (sqlite3_step_monitored(res) == SQLITE_ROW) {
rrdhost_system_info_set_by_name(
system_info, (char *)sqlite3_column_text(res, 0), (char *)sqlite3_column_text(res, 1));
}
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
}
#define SELECT_HOST_LABELS "SELECT label_key, label_value, source_type FROM host_label WHERE host_id = @host_id " \
"AND label_key IS NOT NULL AND label_value IS NOT NULL"
RRDLABELS *sql_load_host_labels(nd_uuid_t *host_id)
{
RRDLABELS *labels = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, SELECT_HOST_LABELS, &res))
return NULL;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, host_id, sizeof(*host_id), SQLITE_STATIC));
param = 0;
labels = rrdlabels_create();
while (sqlite3_step_monitored(res) == SQLITE_ROW) {
rrdlabels_add(
labels,
(const char *)sqlite3_column_text(res, 0),
(const char *)sqlite3_column_text(res, 1),
sqlite3_column_int(res, 2));
}
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
return labels;
}
static int exec_statement_with_uuid(const char *sql, nd_uuid_t *uuid)
{
int result = 1;
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, sql, &res)) {
error_report("Failed to prepare statement %s", sql);
return 1;
}
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, uuid, sizeof(*uuid), SQLITE_STATIC));
param = 0;
int rc = execute_insert(res);
if (likely(rc == SQLITE_DONE))
result = SQLITE_OK;
else
error_report("Failed to execute %s, rc = %d", sql, rc);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
return result;
}
static void recover_database(const char *sqlite_database, const char *new_sqlite_database)
{
sqlite3 *database;
int rc = sqlite3_open(sqlite_database, &database);
if (rc != SQLITE_OK)
return;
netdata_log_info("Recover %s", sqlite_database);
netdata_log_info(" to %s", new_sqlite_database);
// This will remove the -shm and -wal files when we close the database
(void) db_execute(database, "select count(*) from sqlite_master limit 0");
sqlite3_recover *recover = sqlite3_recover_init(database, "main", new_sqlite_database);
if (recover) {
rc = sqlite3_recover_run(recover);
if (rc == SQLITE_OK)
netdata_log_info("Recover complete");
else
netdata_log_error("Recover encountered an error but the database may be usable");
rc = sqlite3_recover_finish(recover);
(void) sqlite3_close(database);
if (rc == SQLITE_OK) {
rc = rename(new_sqlite_database, sqlite_database);
if (rc == 0) {
netdata_log_info("Renamed %s", new_sqlite_database);
netdata_log_info(" to %s", sqlite_database);
}
}
else
netdata_log_error("Recover failed to free resources");
}
else
(void) sqlite3_close(database);
}
static void sqlite_uuid_parse(sqlite3_context *context, int argc, sqlite3_value **argv)
{
nd_uuid_t uuid;
if ( argc != 1 ){
sqlite3_result_null(context);
return ;
}
int rc = uuid_parse((const char *) sqlite3_value_text(argv[0]), uuid);
if (rc == -1) {
sqlite3_result_null(context);
return ;
}
sqlite3_result_blob(context, &uuid, sizeof(nd_uuid_t), SQLITE_TRANSIENT);
}
void sqlite_now_usec(sqlite3_context *context, int argc, sqlite3_value **argv)
{
if (argc != 1 ){
sqlite3_result_null(context);
return ;
}
if (sqlite3_value_int(argv[0]) != 0) {
struct timespec req = {.tv_sec = 0, .tv_nsec = 1};
nanosleep(&req, NULL);
}
sqlite3_result_int64(context, (sqlite_int64) now_realtime_usec());
}
void sqlite_uuid_random(sqlite3_context *context, int argc, sqlite3_value **argv)
{
(void)argc;
(void)argv;
nd_uuid_t uuid;
uuid_generate_random(uuid);
sqlite3_result_blob(context, &uuid, sizeof(nd_uuid_t), SQLITE_TRANSIENT);
}
static int64_t sql_get_wal_size(const char *database_file)
{
char filename[FILENAME_MAX + 1];
snprintfz(filename, sizeof(filename) - 1, "%s/%s-wal", netdata_configured_cache_dir, database_file);
uv_fs_t req;
int result = uv_fs_stat(NULL, &req, filename, NULL);
int64_t file_size = result >= 0 ? (int64_t) req.statbuf.st_size : -1;
uv_fs_req_cleanup(&req);
return file_size;
}
#define SQLITE_METADATA_WAL_LIMIT_X (10)
bool sql_metadata_wal_size_acceptable()
{
int64_t wal_size = sql_get_wal_size("netdata-meta.db");
if (wal_size > SQLITE_METADATA_WAL_LIMIT_X * def_journal_size_limit)
return false;
return true;
}
// Init
/*
* Initialize the SQLite database
* Return 0 on success
*/
int sql_init_meta_database(db_check_action_type_t rebuild, int memory)
{
char *err_msg = NULL;
char sqlite_database[FILENAME_MAX + 1];
int rc;
if (likely(!memory)) {
snprintfz(sqlite_database, sizeof(sqlite_database) - 1, "%s/.netdata-meta.db.recover", netdata_configured_cache_dir);
rc = unlink(sqlite_database);
snprintfz(sqlite_database, FILENAME_MAX, "%s/netdata-meta.db", netdata_configured_cache_dir);
if (rc == 0 || (rebuild & DB_CHECK_RECOVER)) {
char new_sqlite_database[FILENAME_MAX + 1];
snprintfz(new_sqlite_database, sizeof(new_sqlite_database) - 1, "%s/netdata-meta-recover.db", netdata_configured_cache_dir);
recover_database(sqlite_database, new_sqlite_database);
if (rebuild & DB_CHECK_RECOVER)
return 0;
}
snprintfz(sqlite_database, sizeof(sqlite_database) - 1, "%s/.netdata-meta.db.delete", netdata_configured_cache_dir);
rc = unlink(sqlite_database);
snprintfz(sqlite_database, FILENAME_MAX, "%s/netdata-meta.db", netdata_configured_cache_dir);
if (rc == 0) {
char new_sqlite_database[FILENAME_MAX + 1];
snprintfz(new_sqlite_database, sizeof(new_sqlite_database) - 1, "%s/netdata-meta.bad", netdata_configured_cache_dir);
rc = rename(sqlite_database, new_sqlite_database);
if (rc)
error_report("Failed to rename %s to %s", sqlite_database, new_sqlite_database);
}
// note: sqlite_database contains the right name
}
else
strncpyz(sqlite_database, ":memory:", sizeof(sqlite_database) - 1);
rc = sqlite3_open(sqlite_database, &db_meta);
if (rc != SQLITE_OK) {
error_report("Failed to initialize database at %s, due to \"%s\"", sqlite_database, sqlite3_errstr(rc));
char *error_str = get_database_extented_error(db_meta, 0, "meta_open");
if (error_str)
analytics_set_data_str(&analytics_data.netdata_fail_reason, error_str);
freez(error_str);
goto close_database;
}
if (rebuild & DB_CHECK_RECLAIM_SPACE) {
netdata_log_info("Reclaiming space of %s", sqlite_database);
rc = sqlite3_exec_monitored(db_meta, "VACUUM", 0, 0, &err_msg);
if (rc != SQLITE_OK) {
error_report("Failed to execute VACUUM rc = %d (%s)", rc, err_msg);
sqlite3_free(err_msg);
}
else {
(void) db_execute(db_meta, "select count(*) from sqlite_master limit 0");
(void) sqlite3_close(db_meta);
}
return 1;
}
if (rebuild & DB_CHECK_ANALYZE) {
errno_clear();
netdata_log_info("Running ANALYZE on %s", sqlite_database);
rc = sqlite3_exec_monitored(db_meta, "ANALYZE", 0, 0, &err_msg);
if (rc != SQLITE_OK) {
error_report("Failed to execute ANALYZE rc = %d (%s)", rc, err_msg);
sqlite3_free(err_msg);
}
else {
(void) db_execute(db_meta, "select count(*) from sqlite_master limit 0");
(void) sqlite3_close(db_meta);
}
return 1;
}
errno_clear();
netdata_log_info("SQLite database %s initialization", sqlite_database);
rc = sqlite3_create_function(db_meta, "u2h", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, 0, sqlite_uuid_parse, 0, 0);
if (unlikely(rc != SQLITE_OK))
error_report("Failed to register internal u2h function");
rc = sqlite3_create_function(db_meta, "now_usec", 1, SQLITE_ANY, 0, sqlite_now_usec, 0, 0);
if (unlikely(rc != SQLITE_OK))
error_report("Failed to register internal now_usec function");
rc = sqlite3_create_function(db_meta, "uuid_random", 0, SQLITE_ANY, 0, sqlite_uuid_random, 0, 0);
if (unlikely(rc != SQLITE_OK))
error_report("Failed to register internal uuid_random function");
int target_version = DB_METADATA_VERSION;
if (likely(!memory))
target_version = perform_database_migration(db_meta, DB_METADATA_VERSION);
if (configure_sqlite_database(db_meta, target_version, "meta_config"))
goto close_database;
if (init_database_batch(db_meta, &database_config[0], "meta_init"))
goto close_database;
if (init_database_batch(db_meta, &database_cleanup[0], "meta_cleanup"))
goto close_database;
netdata_log_info("SQLite database initialization completed");
return 0;
close_database:
sqlite3_close(db_meta);
db_meta = NULL;
return 1;
}
// Metadata functions
struct query_build {
BUFFER *sql;
int count;
char uuid_str[UUID_STR_LEN];
};
static int host_label_store_to_sql_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) {
struct query_build *lb = data;
if (unlikely(!lb->count))
buffer_sprintf(lb->sql, STORE_HOST_LABEL);
else
buffer_strcat(lb->sql, ", ");
buffer_sprintf(lb->sql, STORE_HOST_OR_CHART_LABEL_VALUE, lb->uuid_str, (int) (ls & ~(RRDLABEL_FLAG_INTERNAL)), name, value);
lb->count++;
return 1;
}
static int chart_label_store_to_sql_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) {
struct query_build *lb = data;
if (unlikely(!lb->count))
buffer_sprintf(lb->sql, STORE_CHART_LABEL);
else
buffer_strcat(lb->sql, ", ");
buffer_sprintf(lb->sql, STORE_HOST_OR_CHART_LABEL_VALUE, lb->uuid_str, (int) (ls & ~(RRDLABEL_FLAG_INTERNAL)), name, value);
lb->count++;
return 1;
}
static int check_and_update_chart_labels(RRDSET *st, BUFFER *work_buffer)
{
size_t old_version = st->rrdlabels_last_saved_version;
size_t new_version = rrdlabels_version(st->rrdlabels);
if (new_version == old_version)
return 0;
struct query_build tmp = {.sql = work_buffer, .count = 0};
uuid_unparse_lower(st->chart_uuid, tmp.uuid_str);
rrdlabels_walkthrough_read(st->rrdlabels, chart_label_store_to_sql_callback, &tmp);
buffer_strcat(work_buffer, " ON CONFLICT (chart_id, label_key) DO UPDATE SET source_type = excluded.source_type, label_value=excluded.label_value, date_created=UNIXEPOCH()");
int rc = db_execute(db_meta, buffer_tostring(work_buffer));
if (likely(!rc))
st->rrdlabels_last_saved_version = new_version;
return rc;
}
// If the machine guid has changed, then existing one with hops 0 will be marked as hops 1 (child)
void detect_machine_guid_change(nd_uuid_t *host_uuid)
{
int rc;
rc = exec_statement_with_uuid(CONVERT_EXISTING_LOCALHOST, host_uuid);
if (!rc) {
if (unlikely(db_execute(db_meta, DELETE_MISSING_NODE_INSTANCES)))
error_report("Failed to remove deleted hosts from node instances");
}
}
static int store_claim_id(nd_uuid_t *host_id, nd_uuid_t *claim_id)
{
sqlite3_stmt *res = NULL;
int rc = 0;
if (!REQUIRE_DB(db_meta))
return 1;
if (!PREPARE_STATEMENT(db_meta, SQL_STORE_CLAIM_ID, &res))
return 1;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, host_id, sizeof(*host_id), SQLITE_STATIC));
if (claim_id)
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param,claim_id, sizeof(*claim_id), SQLITE_STATIC));
else
SQLITE_BIND_FAIL(done, sqlite3_bind_null(res, ++param));
param = 0;
rc = sqlite3_step_monitored(res);
if (unlikely(rc != SQLITE_DONE))
error_report("Failed to store host claim id rc = %d", rc);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
return rc != SQLITE_DONE;
}
#define SQL_DELETE_DIMENSION_BY_ID "DELETE FROM dimension WHERE rowid = @dimension_row AND dim_id = @uuid"
static void delete_dimension_by_rowid(sqlite3_stmt **res, int64_t dimension_id, nd_uuid_t *dim_uuid)
{
if (!*res) {
if (!PREPARE_STATEMENT(db_meta, SQL_DELETE_DIMENSION_BY_ID, res))
return;
}
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_int64(*res, ++param, dimension_id));
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(*res, ++param, dim_uuid, sizeof(*dim_uuid), SQLITE_STATIC));
param = 0;
int rc = sqlite3_step_monitored(*res);
if (unlikely(rc != SQLITE_DONE))
error_report("Failed to delete dimension id, rc = %d", rc);
done:
REPORT_BIND_FAIL(*res, param);
SQLITE_RESET(*res);
}
static void delete_dimension_uuid(nd_uuid_t *dimension_uuid, sqlite3_stmt **action_res __maybe_unused, bool flag __maybe_unused)
{
static __thread sqlite3_stmt *res = NULL;
int rc;
if (!PREPARE_COMPILED_STATEMENT(db_meta, DELETE_DIMENSION_UUID, &res))
return;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, dimension_uuid, sizeof(*dimension_uuid), SQLITE_STATIC));
param = 0;
rc = sqlite3_step_monitored(res);
if (unlikely(rc != SQLITE_DONE))
error_report("Failed to delete dimension uuid, rc = %d", rc);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
}
//
// Store host and host system info information in the database
static int store_host_metadata(RRDHOST *host)
{
static __thread sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_STORE_HOST_INFO, &res))
return false;
int param = 0;
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_hostname(host), 0));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_registry_hostname(host), 1));
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_int(res, ++param, host->rrd_update_every));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_os(host), 1));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_timezone(host), 1));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, "", 1));
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_int(res, ++param, rrdhost_ingestion_hops(host)));
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_int(res, ++param, host->rrd_memory_mode));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_abbrev_timezone(host), 1));
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_int(res, ++param, host->utc_offset));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_program_name(host), 1));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, rrdhost_program_version(host), 1));
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_int64(res, ++param, host->rrd_history_entries));
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_int(res, ++param, (int)host->health.enabled));
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_int64(res, ++param, (sqlite3_int64) host->stream.snd.status.last_connected));
int store_rc = sqlite3_step_monitored(res);
if (unlikely(store_rc != SQLITE_DONE))
error_report("Failed to store host %s, rc = %d", rrdhost_hostname(host), store_rc);
SQLITE_RESET(res);
return store_rc != SQLITE_DONE;
bind_fail:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
return 1;
}
static int add_host_sysinfo_key_value(const char *name, const char *value, nd_uuid_t *uuid)
{
static __thread sqlite3_stmt *res = NULL;
if (!REQUIRE_DB(db_meta))
return 0;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_STORE_HOST_SYSTEM_INFO_VALUES, &res))
return 0;
int param = 0;
SQLITE_BIND_FAIL(bind_fail, sqlite3_bind_blob(res, ++param, uuid, sizeof(*uuid), SQLITE_STATIC));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, name, 0));
SQLITE_BIND_FAIL(bind_fail, bind_text_null(res, ++param, value ? value : "unknown", 0));
int store_rc = sqlite3_step_monitored(res);
if (unlikely(store_rc != SQLITE_DONE))
error_report("Failed to store host info value %s, rc = %d", name, store_rc);
SQLITE_RESET(res);
return store_rc == SQLITE_DONE;
bind_fail:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
return 0;
}
static bool store_host_systeminfo(RRDHOST *host)
{
struct rrdhost_system_info *system_info = host->system_info;
if (unlikely(!system_info))
return false;
return (24 != rrdhost_system_info_foreach(system_info, add_host_sysinfo_key_value, &host->host_id.uuid));
}
/*
* Store a chart in the database
*/
static int store_chart_metadata(RRDSET *st, sqlite3_stmt **res)
{
if (!*res) {
if (!PREPARE_STATEMENT(db_meta, SQL_STORE_CHART, res))
return 1;
}
int rc = 1;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(*res, ++param, &st->chart_uuid, sizeof(st->chart_uuid), SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(*res, ++param, &st->rrdhost->host_id.uuid, sizeof(st->rrdhost->host_id.uuid), SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, string2str(st->parts.type), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, string2str(st->parts.id), -1, SQLITE_STATIC));
const char *name = string2str(st->parts.name);
if (name && *name)
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, name, -1, SQLITE_STATIC));
else
SQLITE_BIND_FAIL(done, sqlite3_bind_null(*res, ++param));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, rrdset_family(st), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, rrdset_context(st), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, rrdset_title(st), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, rrdset_units(st), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, rrdset_plugin_name(st), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, rrdset_module_name(st), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(*res, ++param, (int) st->priority));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(*res, ++param, st->update_every));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(*res, ++param, st->chart_type));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(*res, ++param, st->rrd_memory_mode));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(*res, ++param, (int) st->db.entries));
param = 0;
rc = sqlite3_step_monitored(*res);
if (unlikely(rc != SQLITE_DONE))
error_report("Failed to store chart, rc = %d", rc);
done:
REPORT_BIND_FAIL(*res, param);
SQLITE_RESET(*res);
return rc != SQLITE_DONE;
}
static bool store_dimension_metadata(RRDDIM *rd, sqlite3_stmt **res)
{
if (!*res) {
if (!PREPARE_STATEMENT(db_meta, SQL_STORE_DIMENSION, res))
return 1;
}
int rc = 1;
int param = 0;
nd_uuid_t *rd_uuid = uuidmap_uuid_ptr(rd->uuid);
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(*res, ++param, rd_uuid, sizeof(*rd_uuid), SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(*res, ++param, &rd->rrdset->chart_uuid, sizeof(rd->rrdset->chart_uuid), SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, string2str(rd->id), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, string2str(rd->name), -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(*res, ++param, (int) rd->multiplier));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(*res, ++param, (int ) rd->divisor));
SQLITE_BIND_FAIL(done, sqlite3_bind_int(*res, ++param, rd->algorithm));
if (rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN))
SQLITE_BIND_FAIL(done, sqlite3_bind_text(*res, ++param, "hidden", -1, SQLITE_STATIC));
else
SQLITE_BIND_FAIL(done, sqlite3_bind_null(*res, ++param));
param = 0;
rc = sqlite3_step_monitored(*res);
if (unlikely(rc != SQLITE_DONE))
error_report("Failed to store dimension, rc = %d", rc);
done:
REPORT_BIND_FAIL(*res, param);
SQLITE_RESET(*res);
return (rc != SQLITE_DONE);
}
static bool dimension_can_be_deleted(nd_uuid_t *dim_uuid __maybe_unused, sqlite3_stmt **res __maybe_unused, bool flag __maybe_unused)
{
#ifdef ENABLE_DBENGINE
if(dbengine_enabled) {
bool no_retention = true;
for (size_t tier = 0; tier < nd_profile.storage_tiers; tier++) {
if (!multidb_ctx[tier])
continue;
time_t first_time_t = 0, last_time_t = 0;
if (rrdeng_metric_retention_by_uuid((void *) multidb_ctx[tier], dim_uuid, &first_time_t, &last_time_t)) {
if (first_time_t > 0) {
no_retention = false;
break;
}
}
}
return no_retention;
}
else
return false;
#else
return false;
#endif
}
static bool run_cleanup_loop(
sqlite3_stmt *res,
struct metadata_wc *wc,
bool (*check_cb)(nd_uuid_t *, sqlite3_stmt **, bool),
void (*action_cb)(nd_uuid_t *, sqlite3_stmt **, bool),
uint32_t *total_checked,
uint32_t *total_deleted,
uint64_t *row_id,
sqlite3_stmt **check_stmt,
sqlite3_stmt **action_stmt,
bool check_flag,
bool action_flag)
{
if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)))
return true;
int rc = sqlite3_bind_int64(res, 1, (sqlite3_int64) *row_id);
if (unlikely(rc != SQLITE_OK))
return true;
time_t start_running = now_monotonic_sec();
bool time_expired = false;
uint32_t l_checked = 0;
uint32_t l_deleted = 0;
while (!time_expired && sqlite3_step_monitored(res) == SQLITE_ROW) {
if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)))
break;
*row_id = sqlite3_column_int64(res, 1);
rc = check_cb((nd_uuid_t *)sqlite3_column_blob(res, 0), check_stmt, check_flag);
if (rc == true) {
action_cb((nd_uuid_t *)sqlite3_column_blob(res, 0), action_stmt, action_flag);
l_deleted++;
// if (false == sql_metadata_wal_size_acceptable())
// (void) sqlite3_wal_checkpoint(db_meta, NULL);
}
l_checked++;
time_expired = ((now_monotonic_sec() - start_running) > METADATA_RUNTIME_THRESHOLD);
}
(*total_checked) += l_checked;
(*total_deleted) += l_deleted;
return time_expired;
}
#define SQL_CHECK_CHART_EXISTENCE_IN_DIMENSION "SELECT count(1) FROM dimension WHERE chart_id = @chart_id"
#define SQL_CHECK_CHART_EXISTENCE_IN_CHART "SELECT count(1) FROM chart WHERE chart_id = @chart_id"
static bool chart_can_be_deleted(nd_uuid_t *chart_uuid, sqlite3_stmt **check_res, bool check_in_dimension)
{
int rc, result = 1;
sqlite3_stmt *res = check_res ? *check_res : NULL;
if (!res) {
if (!PREPARE_STATEMENT(
db_meta,
check_in_dimension ? SQL_CHECK_CHART_EXISTENCE_IN_DIMENSION : SQL_CHECK_CHART_EXISTENCE_IN_CHART,
&res))
return 0;
if (check_res)
*check_res = res;
}
rc = sqlite3_bind_blob(res, 1, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC);
if (unlikely(rc != SQLITE_OK)) {
error_report("Failed to bind chart uuid parameter, rc = %d", rc);
goto skip;
}
rc = sqlite3_step_monitored(res);
if (likely(rc == SQLITE_ROW))
result = sqlite3_column_int(res, 0);
skip:
if (check_res)
SQLITE_RESET(res);
else
SQLITE_FINALIZE(res);
return result == 0;
}
#define SQL_DELETE_CHART_BY_UUID "DELETE FROM chart WHERE chart_id = @chart_id"
#define SQL_DELETE_CHART_LABEL_BY_UUID "DELETE FROM chart_label WHERE chart_id = @chart_id"
static void delete_chart_uuid(nd_uuid_t *chart_uuid, sqlite3_stmt **action_res, bool label_only)
{
int rc;
sqlite3_stmt *res = action_res ? *action_res : NULL;
if (!res) {
if (!PREPARE_STATEMENT(db_meta, label_only ? SQL_DELETE_CHART_LABEL_BY_UUID : SQL_DELETE_CHART_BY_UUID, &res))
return;
if (action_res)
*action_res = res;
}
rc = sqlite3_bind_blob(res, 1, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC);
if (unlikely(rc != SQLITE_OK)) {
error_report("Failed to bind chart uuid parameter, rc = %d", rc);
goto skip;
}
rc = sqlite3_step_monitored(res);
if (unlikely(rc != SQLITE_DONE))
error_report("Failed to delete a chart uuid from the %s table, rc = %d", label_only ? "labels" : "chart", rc);
skip:
if (action_res)
SQLITE_RESET(res);
else
SQLITE_FINALIZE(res);
}
static uint64_t get_rowid_from_statement(const char *sql)
{
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, sql, &res))
return 0;
uint64_t rowid = 0;
if (sqlite3_step_monitored(res) == SQLITE_ROW) {
rowid = sqlite3_column_int64(res, 0);
}
SQLITE_FINALIZE(res);
return rowid;
}
#define SQL_GET_MAX_DIM_ROW_ID "SELECT MAX(rowid) FROM dimension"
static bool check_dimension_metadata(struct metadata_wc *wc)
{
static time_t next_execution_t = 0;
static uint64_t last_row_id = 0;
static uint64_t max_row_id = 0;
time_t now = now_realtime_sec();
if (!next_execution_t) {
next_execution_t = now + METADATA_MAINTENANCE_FIRST_CHECK;
max_row_id = get_rowid_from_statement(SQL_GET_MAX_DIM_ROW_ID);
nd_log(NDLS_DAEMON, NDLP_INFO, "Dimension metadata check has been scheduled to run (max id = %lu)", max_row_id);
}
if (next_execution_t && next_execution_t > now)
return true;
if (max_row_id && last_row_id >= max_row_id) {
nd_log_daemon(NDLP_INFO, "Dimension metadata check completed");
// For long running agents, check in a week
next_execution_t = now + 604800;
return true;
}
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, SELECT_DIMENSION_LIST, &res))
return true;
uint32_t total_checked = 0;
uint32_t total_deleted = 0;
nd_log(NDLS_DAEMON, NDLP_DEBUG, "Checking dimensions starting after row %" PRIu64, last_row_id);
worker_is_busy(UV_EVENT_DIMENSION_CLEANUP);
(void) run_cleanup_loop(
res,
wc,
dimension_can_be_deleted,
delete_dimension_uuid,
&total_checked,
&total_deleted,
&last_row_id,
NULL,
NULL,
false,
false);
now = now_realtime_sec();
next_execution_t = now + METADATA_MAINTENANCE_REPEAT;
nd_log_daemon(
NDLP_DEBUG,
"Dimensions checked %u, deleted %u. Checks will resume in %d seconds",
total_checked,
total_deleted,
METADATA_MAINTENANCE_REPEAT);
SQLITE_FINALIZE(res);
worker_is_idle();
return false;
}
#define SQL_GET_MAX_CHART_ROW_ID "SELECT MAX(rowid) FROM chart"
static bool check_chart_metadata(struct metadata_wc *wc)
{
static time_t next_execution_t = 0;
static uint64_t last_row_id = 0;
static uint64_t max_row_id = 0;
static bool check_completed = false;
if (check_completed)
return true;
time_t now = now_realtime_sec();
if (!next_execution_t) {
next_execution_t = now + METADATA_MAINTENANCE_FIRST_CHECK;
max_row_id = get_rowid_from_statement(SQL_GET_MAX_CHART_ROW_ID);
nd_log(NDLS_DAEMON, NDLP_INFO, "Chart metadata check has been scheduled to run (max id = %lu)", max_row_id);
}
if (next_execution_t && next_execution_t > now)
return true;
if (max_row_id && last_row_id >= max_row_id) {
nd_log(NDLS_DAEMON, NDLP_INFO, "Chart metadata check completed");
check_completed = true;
return true;
}
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, SELECT_CHART_LIST, &res))
return true;
uint32_t total_checked = 0;
uint32_t total_deleted = 0;
nd_log(NDLS_DAEMON, NDLP_DEBUG, "Checking charts starting after row %" PRIu64, last_row_id);
worker_is_busy(UV_EVENT_CHART_CLEANUP);
sqlite3_stmt *check_res = NULL;
sqlite3_stmt *action_res = NULL;
(void)run_cleanup_loop(
res,
wc,
chart_can_be_deleted,
delete_chart_uuid,
&total_checked,
&total_deleted,
&last_row_id,
&check_res,
&action_res,
true,
false);
SQLITE_FINALIZE(check_res);
SQLITE_FINALIZE(action_res);
now = now_realtime_sec();
next_execution_t = now + METADATA_MAINTENANCE_REPEAT;
nd_log_daemon(
NDLP_DEBUG,
"Charts checked %u, deleted %u. Checks will resume in %d seconds",
total_checked,
total_deleted,
METADATA_MAINTENANCE_REPEAT);
SQLITE_FINALIZE(res);
worker_is_idle();
return false;
}
#define SQL_GET_MAX_CHART_LABEL_ROW_ID "SELECT MAX(rowid) FROM chart_label"
static bool check_label_metadata(struct metadata_wc *wc)
{
static time_t next_execution_t = 0;
static uint64_t last_row_id = 0;
static uint64_t max_row_id = 0;
static bool check_completed = false;
if (check_completed)
return true;
time_t now = now_realtime_sec();
if (!next_execution_t) {
next_execution_t = now + METADATA_MAINTENANCE_FIRST_CHECK;
max_row_id = get_rowid_from_statement(SQL_GET_MAX_CHART_LABEL_ROW_ID);
nd_log(NDLS_DAEMON, NDLP_INFO, "Chart label metadata check has been scheduled to run (max id = %lu)", max_row_id);
}
if (next_execution_t && next_execution_t > now)
return true;
if (max_row_id && last_row_id >= max_row_id) {
nd_log(NDLS_DAEMON, NDLP_INFO, "Chart label metadata check completed");
check_completed = true;
return true;
}
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, SELECT_CHART_LABEL_LIST, &res))
return true;
uint32_t total_checked = 0;
uint32_t total_deleted = 0;
nd_log(NDLS_DAEMON, NDLP_DEBUG, "Checking charts labels starting after row %" PRIu64, last_row_id);
sqlite3_stmt *check_res = NULL;
sqlite3_stmt *action_res = NULL;
worker_is_busy(UV_EVENT_CHART_LABEL_CLEANUP);
(void )run_cleanup_loop(
res,
wc,
chart_can_be_deleted,
delete_chart_uuid,
&total_checked,
&total_deleted,
&last_row_id,
&check_res,
&action_res,
false,
true);
SQLITE_FINALIZE(check_res);
SQLITE_FINALIZE(action_res);
now = now_realtime_sec();
next_execution_t = now + METADATA_LABEL_CHECK_INTERVAL;
nd_log_daemon(
NDLP_DEBUG,
"Chart labels checked %u, deleted %u. Checks will resume in %d seconds",
total_checked,
total_deleted,
METADATA_LABEL_CHECK_INTERVAL);
SQLITE_FINALIZE(res);
worker_is_idle();
return false;
}
static void cleanup_health_log(struct metadata_wc *wc)
{
static time_t next_execution_t = 0;
time_t now = now_realtime_sec();
if (!next_execution_t)
next_execution_t = now + METADATA_MAINTENANCE_FIRST_CHECK;
if (next_execution_t && next_execution_t > now)
return;
next_execution_t = now + METADATA_HEALTH_LOG_INTERVAL;
RRDHOST *host;
worker_is_busy(UV_EVENT_HEALTH_LOG_CLEANUP);
dfe_start_reentrant(rrdhost_root_index, host)
{
sql_health_alarm_log_cleanup(host);
if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)))
break;
}
dfe_done(host);
if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) {
worker_is_idle();
return;
}
(void) db_execute(db_meta,"DELETE FROM health_log WHERE host_id NOT IN (SELECT host_id FROM host)");
(void) db_execute(db_meta,"DELETE FROM health_log_detail WHERE health_log_id NOT IN (SELECT health_log_id FROM health_log)");
(void) db_execute(db_meta,"DELETE FROM alert_version WHERE health_log_id NOT IN (SELECT health_log_id FROM health_log)");
worker_is_idle();
}
//
// EVENT LOOP STARTS HERE
//
static void metadata_free_cmd_queue(struct metadata_wc *wc)
{
spinlock_lock(&wc->cmd_queue_lock);
while(wc->cmd_base) {
struct metadata_cmd *t = wc->cmd_base;
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wc->cmd_base, t, prev, next);
aral_freez(wc->ar, t);
}
spinlock_unlock(&wc->cmd_queue_lock);
}
static void metadata_enq_cmd(struct metadata_wc *wc, struct metadata_cmd *cmd)
{
if(unlikely(!wc->initialized))
return;
if (cmd->opcode == METADATA_SYNC_SHUTDOWN) {
metadata_flag_set(wc, METADATA_FLAG_SHUTDOWN);
goto wakeup_event_loop;
}
if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)))
goto wakeup_event_loop;
struct metadata_cmd *t = aral_mallocz(wc->ar);
*t = *cmd;
t->prev = t->next = NULL;
spinlock_lock(&wc->cmd_queue_lock);
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(wc->cmd_base, t, prev, next);
spinlock_unlock(&wc->cmd_queue_lock);
wakeup_event_loop:
(void) uv_async_send(&wc->async);
}
static struct metadata_cmd metadata_deq_cmd(struct metadata_wc *wc)
{
struct metadata_cmd ret, *to_free = NULL;
spinlock_lock(&wc->cmd_queue_lock);
if(wc->cmd_base) {
struct metadata_cmd *t = wc->cmd_base;
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wc->cmd_base, t, prev, next);
ret = *t;
to_free = t;
}
else {
ret.opcode = METADATA_DATABASE_NOOP;
ret.completion = NULL;
}
spinlock_unlock(&wc->cmd_queue_lock);
aral_freez(wc->ar, to_free);
return ret;
}
static void async_cb(uv_async_t *handle)
{
uv_stop(handle->loop);
uv_update_time(handle->loop);
}
#define TIMER_INITIAL_PERIOD_MS (1000)
#define TIMER_REPEAT_PERIOD_MS (1000)
static void timer_cb(uv_timer_t* handle)
{
uv_stop(handle->loop);
uv_update_time(handle->loop);
struct metadata_wc *wc = handle->data;
if (wc->metadata_check_after < now_realtime_sec()) {
struct metadata_cmd cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = METADATA_SCAN_HOSTS;
metadata_enq_cmd(wc, &cmd);
}
}
void vacuum_database(sqlite3 *database, const char *db_alias, int threshold, int vacuum_pc)
{
static time_t next_run = 0;
time_t now = now_realtime_sec();
if (next_run > now)
return;
next_run = now + DATABASE_VACUUM_FREQUENCY_SECONDS;
int free_pages = get_free_page_count(database);
int total_pages = get_database_page_count(database);
if (!threshold)
threshold = DATABASE_FREE_PAGES_THRESHOLD_PC;
if (!vacuum_pc)
vacuum_pc = DATABASE_FREE_PAGES_VACUUM_PC;
if (free_pages > (total_pages * threshold / 100)) {
int do_free_pages = (int)(free_pages * vacuum_pc / 100);
nd_log(NDLS_DAEMON, NDLP_DEBUG, "%s: Freeing %d database pages", db_alias, do_free_pages);
char sql[128];
snprintfz(sql, sizeof(sql) - 1, "PRAGMA incremental_vacuum(%d)", do_free_pages);
(void)db_execute(database, sql);
}
}
#define SQL_SELECT_HOST_CTX_CHART_DIM_LIST \
"SELECT d.dim_id, d.rowid FROM chart c, dimension d WHERE c.chart_id = d.chart_id AND c.rowid = @rowid"
static bool clean_host_chart_dimensions(sqlite3_stmt **res, int64_t chart_row_id, size_t *checked, size_t *deleted)
{
struct metadata_wc *wc = &metasync_worker;
bool can_continue = false;
if (!*res) {
if (!PREPARE_STATEMENT(db_meta, SQL_SELECT_HOST_CTX_CHART_DIM_LIST, res))
return false;
}
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_int64(*res, ++param, chart_row_id));
param = 0;
sqlite3_stmt *dim_del_stmt = NULL;
can_continue = true;
while (can_continue && sqlite3_step_monitored(*res) == SQLITE_ROW) {
if (sqlite3_column_bytes(*res, 0) != sizeof(nd_uuid_t))
continue;
nd_uuid_t *dim_uuid = (nd_uuid_t *)sqlite3_column_blob(*res, 0);
int64_t dimension_id = sqlite3_column_int64(*res, 1);
if (dimension_can_be_deleted(dim_uuid, NULL, false)) {
delete_dimension_by_rowid(&dim_del_stmt, dimension_id, dim_uuid);
(*deleted)++;
}
(*checked)++;
can_continue = (!metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)) && sql_metadata_wal_size_acceptable();
}
SQLITE_FINALIZE(dim_del_stmt);
done:
REPORT_BIND_FAIL(*res, param);
SQLITE_RESET(*res);
return can_continue;
}
#define SQL_SELECT_HOST_CTX_CHART_LIST "SELECT rowid, context FROM chart WHERE host_id = @host"
static void cleanup_host_context_metadata(Pvoid_t CTX_JudyL, void *data)
{
if (!CTX_JudyL || !data)
return;
struct metadata_wc *wc = &metasync_worker;
RRDHOST *host = data;
sqlite3_stmt *res = NULL;
sqlite3_stmt *dimension_res = NULL;
sqlite3_stmt *context_res = NULL;
if (!PREPARE_STATEMENT(db_meta, SQL_SELECT_HOST_CTX_CHART_LIST, &res))
return;
Word_t num_of_contexts = JudyLCount(CTX_JudyL, 0, -1, PJE0);
nd_log_daemon(NDLP_DEBUG, "Verifying the retention of %zu contexts for host %s", num_of_contexts, rrdhost_hostname(host));
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_id.uuid, sizeof(host->host_id.uuid), SQLITE_STATIC));
param = 0;
Pvoid_t *Pvalue;
int64_t chart_row_id;
size_t deleted = 0;
size_t checked = 0;
bool can_continue = true;
while (can_continue && sqlite3_step_monitored(res) == SQLITE_ROW) {
chart_row_id = sqlite3_column_int64(res, 0);
const char *context = (char *)sqlite3_column_text(res, 1);
STRING *ctx = string_strdupz(context);
Pvalue = JudyLGet(CTX_JudyL, (Word_t)ctx, PJE0);
if (Pvalue) {
can_continue = clean_host_chart_dimensions(&dimension_res, chart_row_id, &checked, &deleted);
ctx_delete_metadata_cleanup_context(&context_res, &host->host_id.uuid, context);
}
string_freez(ctx);
can_continue =
can_continue && (!metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)) && sql_metadata_wal_size_acceptable();
}
SQLITE_FINALIZE(dimension_res);
SQLITE_FINALIZE(context_res);
nd_log_daemon(
NDLP_DEBUG,
"Verified the contexts of host %s (Checked %zu metrics and removed %zu)",
rrdhost_hostname(host),
checked,
deleted);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
}
void run_metadata_cleanup(struct metadata_wc *wc)
{
static time_t next_context_list_cleanup = 0;
time_t now = now_realtime_sec();
if (!next_context_list_cleanup)
next_context_list_cleanup = now + 5;
if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)))
return;
if (next_context_list_cleanup < now && sql_metadata_wal_size_acceptable()) {
RRDHOST *host;
worker_is_busy(UV_EVENT_CTX_CLEANUP);
dfe_start_reentrant(rrdhost_root_index, host) {
ctx_get_context_list_to_cleanup(&host->host_id.uuid, cleanup_host_context_metadata, host);
if (metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN) || false == sql_metadata_wal_size_acceptable())
break;
}
dfe_done(host);
worker_is_idle();
next_context_list_cleanup = now_realtime_sec() + METADATA_MAINTENANCE_CTX_CLEAN_REPEAT;
}
if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)))
return;
if (check_dimension_metadata(wc))
if (check_chart_metadata(wc))
check_label_metadata(wc);
cleanup_health_log(wc);
if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)))
return;
vacuum_database(db_meta, "METADATA", DATABASE_FREE_PAGES_THRESHOLD_PC, DATABASE_FREE_PAGES_VACUUM_PC);
(void) sqlite3_wal_checkpoint(db_meta, NULL);
}
struct scan_metadata_payload {
uv_work_t request;
struct metadata_wc *wc;
void *pending_alert_list;
void *pending_ctx_cleanup_list;
void *pending_uuid_deletion;
void *pending_sql_statement;
BUFFER *work_buffer;
};
struct host_context_load_thread {
uv_thread_t thread;
RRDHOST *host;
sqlite3 *db_meta_thread;
sqlite3 *db_context_thread;
bool busy;
bool finished;
};
__thread sqlite3 *db_meta_thread = NULL;
__thread sqlite3 *db_context_thread = NULL;
__thread bool main_context_thread = false;
static void restore_host_context(void *arg)
{
struct host_context_load_thread *hclt = arg;
RRDHOST *host = hclt->host;
if (!host)
return;
if (!db_meta_thread) {
if (hclt->db_meta_thread) {
db_meta_thread = hclt->db_meta_thread;
db_context_thread = hclt->db_context_thread;
} else {
char sqlite_database[FILENAME_MAX + 1];
snprintfz(sqlite_database, sizeof(sqlite_database) - 1, "%s/netdata-meta.db", netdata_configured_cache_dir);
int rc = sqlite3_open_v2(sqlite_database, &db_meta_thread, SQLITE_OPEN_READONLY | SQLITE_OPEN_NOMUTEX, NULL);
if (rc != SQLITE_OK) {
sqlite3_close(db_meta_thread);
db_meta_thread = NULL;
}
snprintfz(sqlite_database, sizeof(sqlite_database) - 1, "%s/context-meta.db", netdata_configured_cache_dir);
rc = sqlite3_open_v2(sqlite_database, &db_context_thread, SQLITE_OPEN_READONLY | SQLITE_OPEN_NOMUTEX, NULL);
if (rc != SQLITE_OK) {
sqlite3_close(db_context_thread);
db_context_thread = NULL;
}
hclt->db_meta_thread = db_meta_thread;
hclt->db_context_thread = db_context_thread;
}
}
usec_t started_ut = now_monotonic_usec(); (void)started_ut;
rrdhost_load_rrdcontext_data(host);
usec_t ended_ut = now_monotonic_usec(); (void)ended_ut;
char load_duration[64];
duration_snprintf(load_duration, sizeof(load_duration), (int64_t)(ended_ut - started_ut), "us", true);
nd_log_daemon(NDLP_DEBUG, "Contexts for host %s loaded in %s", rrdhost_hostname(host), load_duration);
rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD);
pulse_host_status(host, 0, 0); // this will detect the receiver status
aclk_queue_node_info(host, false);
// Check and clear the thread local variables
if (!main_context_thread) {
db_meta_thread = NULL;
db_context_thread = NULL;
}
__atomic_store_n(&hclt->finished, true, __ATOMIC_RELEASE);
}
// Callback after scan of hosts is done
static void after_start_host_load_context(uv_work_t *req, int status __maybe_unused)
{
struct scan_metadata_payload *data = req->data;
freez(data);
}
static bool cleanup_finished_threads(struct host_context_load_thread *hclt, size_t max_thread_slots, bool wait, size_t *free_slot)
{
if (!hclt)
return false;
bool found_slot = false;
size_t loop_count = 20;
while (loop_count--) {
for (size_t index = 0; index < max_thread_slots; index++) {
if (free_slot && false == __atomic_load_n(&(hclt[index].busy), __ATOMIC_ACQUIRE)) {
found_slot = true;
*free_slot = index;
break;
}
if (__atomic_load_n(&(hclt[index].finished), __ATOMIC_RELAXED) ||
(wait && __atomic_load_n(&(hclt[index].busy), __ATOMIC_ACQUIRE))) {
int rc = uv_thread_join(&(hclt[index].thread));
if (rc)
nd_log_daemon(NDLP_WARNING, "Failed to join thread, rc = %d", rc);
__atomic_store_n(&(hclt[index].busy), false, __ATOMIC_RELEASE);
__atomic_store_n(&(hclt[index].finished), false, __ATOMIC_RELEASE);
found_slot = true;
if (free_slot) {
*free_slot = index;
break;
}
}
}
if (found_slot || wait)
break;
sleep_usec(10 * USEC_PER_MS);
}
return found_slot || wait;
}
static void start_all_host_load_context(uv_work_t *req __maybe_unused)
{
register_libuv_worker_jobs();
struct scan_metadata_payload *data = req->data;
struct metadata_wc *wc = data->wc;
worker_is_busy(UV_EVENT_HOST_CONTEXT_LOAD);
usec_t started_ut = now_monotonic_usec(); (void)started_ut;
RRDHOST *host;
size_t max_threads = netdata_conf_cpus();
if (max_threads < 1)
max_threads = 1;
nd_log(NDLS_DAEMON, NDLP_DEBUG, "Using %zu threads for context loading", max_threads);
struct host_context_load_thread *hclt = max_threads > 1 ? callocz(max_threads, sizeof(*hclt)) : NULL;
size_t thread_index = 0;
main_context_thread = true;
size_t host_count = 0;
size_t sync_exec = 0;
size_t async_exec = 0;
dfe_start_reentrant(rrdhost_root_index, host) {
if (!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD))
continue;
if (metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))
break;
nd_log_daemon(NDLP_DEBUG, "Loading context for host %s", rrdhost_hostname(host));
int rc = 0;
bool thread_found = cleanup_finished_threads(hclt, max_threads, false, &thread_index);
if (thread_found) {
__atomic_store_n(&hclt[thread_index].busy, true, __ATOMIC_RELAXED);
hclt[thread_index].host = host;
rc = uv_thread_create(&hclt[thread_index].thread, restore_host_context, &hclt[thread_index]);
async_exec += (rc == 0);
}
// if single thread, thread creation failure or failure to find slot
if (rc || !thread_found) {
sync_exec++;
struct host_context_load_thread hclt_sync = {.host = host};
restore_host_context(&hclt_sync);
}
host_count++;
}
dfe_done(host);
bool should_clean_threads = cleanup_finished_threads(hclt, max_threads, true, NULL);
if (should_clean_threads) {
for (size_t index = 0; index < max_threads; index++) {
if (hclt[index].db_meta_thread)
sqlite3_close_v2(hclt[index].db_meta_thread);
if (hclt[index].db_context_thread)
sqlite3_close_v2(hclt[index].db_context_thread);
}
freez(hclt);
}
usec_t ended_ut = now_monotonic_usec(); (void)ended_ut;
char load_duration[64];
duration_snprintf(load_duration, sizeof(load_duration), (int64_t)(ended_ut - started_ut), "us", true);
nd_log_daemon(
NDLP_INFO,
"Contexts for %zu hosts loaded: %zu delegated to %zu threads, %zu handled directly, in %s.",
host_count,
async_exec,
max_threads,
sync_exec,
load_duration);
if (db_meta_thread) {
sqlite3_close_v2(db_meta_thread);
sqlite3_close_v2(db_context_thread);
db_meta_thread = NULL;
db_context_thread = NULL;
}
worker_is_idle();
}
// Callback after scan of hosts is done
static void after_metadata_hosts(uv_work_t *req, int status __maybe_unused)
{
struct scan_metadata_payload *data = req->data;
struct metadata_wc *wc = data->wc;
bool first = true;
Word_t Index = 0;
Pvoid_t *Pvalue;
while ((Pvalue = JudyLFirstThenNext(wc->ae_DelJudyL, &Index, &first))) {
ALARM_ENTRY *ae = (ALARM_ENTRY *) Index;
if(!__atomic_load_n(&ae->pending_save_count, __ATOMIC_RELAXED)) {
health_alarm_log_free_one_nochecks_nounlink(ae);
(void) JudyLDel(&wc->ae_DelJudyL, Index, PJE0);
first = true;
Index = 0;
}
}
metadata_flag_clear(wc, METADATA_FLAG_PROCESSING);
if (unlikely(wc->scan_complete))
completion_mark_complete(wc->scan_complete);
freez(data);
}
static void metadata_scan_host(RRDHOST *host, BUFFER *work_buffer, bool shutting_down)
{
static bool skip_models = false;
RRDSET *st;
int rc;
sqlite3_stmt *ml_load_stmt = NULL;
sqlite3_stmt *store_dimension = NULL;
sqlite3_stmt *store_chart = NULL;
bool host_need_recheck = false;
(void)db_execute(db_meta, "BEGIN TRANSACTION");
rrdset_foreach_reentrant(st, host) {
if(rrdset_flag_check(st, RRDSET_FLAG_METADATA_UPDATE)) {
rrdset_flag_clear(st, RRDSET_FLAG_METADATA_UPDATE);
buffer_flush(work_buffer);
worker_is_busy(UV_EVENT_STORE_CHART);
rc = check_and_update_chart_labels(st, work_buffer);
if (unlikely(rc))
error_report("METADATA: 'host:%s': Failed to update labels for chart %s", rrdhost_hostname(host), rrdset_name(st));
rc = store_chart_metadata(st, &store_chart);
if (unlikely(rc)) {
host_need_recheck = true;
rrdset_flag_set(st, RRDSET_FLAG_METADATA_UPDATE);
error_report(
"METADATA: 'host:%s': Failed to store metadata for chart %s",
rrdhost_hostname(host),
rrdset_name(st));
}
worker_is_idle();
}
RRDDIM *rd;
rrddim_foreach_read(rd, st) {
if (rrddim_flag_check(rd, RRDDIM_FLAG_ML_MODEL_LOAD)) {
rrddim_flag_clear(rd, RRDDIM_FLAG_ML_MODEL_LOAD);
if (likely(!skip_models && !shutting_down)) {
worker_is_busy(UV_EVENT_METADATA_ML_LOAD);
skip_models = ml_dimension_load_models(rd, &ml_load_stmt);
worker_is_idle();
}
}
if(likely(!rrddim_flag_check(rd, RRDDIM_FLAG_METADATA_UPDATE)))
continue;
rrddim_flag_clear(rd, RRDDIM_FLAG_METADATA_UPDATE);
if (rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN))
rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN);
else
rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN);
worker_is_busy(UV_EVENT_STORE_DIMENSION);
rc = store_dimension_metadata(rd, &store_dimension);
if (unlikely(rc)) {
host_need_recheck = true;
rrddim_flag_set(rd, RRDDIM_FLAG_METADATA_UPDATE);
error_report(
"METADATA: 'host:%s': Failed to store dimension metadata for chart %s. dimension %s",
rrdhost_hostname(host),
rrdset_name(st),
rrddim_name(rd));
}
worker_is_idle();
}
rrddim_foreach_done(rd);
}
rrdset_foreach_done(st);
(void)db_execute(db_meta, "COMMIT TRANSACTION");
if (host_need_recheck)
rrdhost_flag_set(host,RRDHOST_FLAG_METADATA_UPDATE);
SQLITE_FINALIZE(ml_load_stmt);
SQLITE_FINALIZE(store_dimension);
SQLITE_FINALIZE(store_chart);
return;
}
static void store_host_and_system_info(RRDHOST *host)
{
rrdhost_flag_clear(host, RRDHOST_FLAG_METADATA_INFO);
if (unlikely(store_host_systeminfo(host))) {
error_report("METADATA: 'host:%s': Failed to store host updated system information in the database", rrdhost_hostname(host));
rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_INFO | RRDHOST_FLAG_METADATA_UPDATE);
}
if (unlikely(store_host_metadata(host))) {
error_report("METADATA: 'host:%s': Failed to store host info in the database", rrdhost_hostname(host));
rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_INFO | RRDHOST_FLAG_METADATA_UPDATE);
}
}
struct judy_list_t {
Pvoid_t JudyL;
Word_t count;
};
static void do_pending_uuid_deletion(struct metadata_wc *wc, struct judy_list_t *pending_uuid_deletion)
{
if (!pending_uuid_deletion)
return;
worker_is_busy(UV_EVENT_UUID_DELETION);
usec_t started_ut = now_monotonic_usec(); (void)started_ut;
size_t entries = pending_uuid_deletion->count;
Word_t Index = 0;
bool first = true;
Pvoid_t *Pvalue;
while ((Pvalue = JudyLFirstThenNext(pending_uuid_deletion->JudyL, &Index, &first))) {
if (!*Pvalue)
continue;
if (metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))
break;
nd_uuid_t *uuid = *Pvalue;
if (dimension_can_be_deleted(uuid, NULL, false))
delete_dimension_uuid(uuid, NULL, false);
freez(uuid);
}
(void) JudyLFreeArray(&pending_uuid_deletion->JudyL, PJE0);
freez(pending_uuid_deletion);
usec_t ended_ut = now_monotonic_usec(); (void)ended_ut;
nd_log_daemon(
NDLP_DEBUG,
"Processed %zu dimension delete items in %0.2f ms",
entries,
(double)(ended_ut - started_ut) / USEC_PER_MS);
worker_is_idle();
}
static void store_ctx_cleanup_list(struct metadata_wc *wc, struct judy_list_t *pending_ctx_cleanup_list)
{
if (!pending_ctx_cleanup_list)
return;
worker_is_busy(UV_EVENT_CTX_CLEANUP_SCHEDULE);
usec_t started_ut = now_monotonic_usec(); (void)started_ut;
size_t entries = pending_ctx_cleanup_list->count;
Word_t Index = 0;
bool first = true;
Pvoid_t *Pvalue;
sqlite3_stmt *res = NULL;
while ((Pvalue = JudyLFirstThenNext(pending_ctx_cleanup_list->JudyL, &Index, &first))) {
if (!*Pvalue)
continue;
if (metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))
break;
struct host_ctx_cleanup_s *ctx_cleanup = *Pvalue;
sql_schedule_host_ctx_cleanup(&res, &ctx_cleanup->host_uuid, string2str(ctx_cleanup->context));
string_freez(ctx_cleanup->context);
freez(ctx_cleanup);
}
(void) JudyLFreeArray(&pending_ctx_cleanup_list->JudyL, PJE0);
freez(pending_ctx_cleanup_list);
SQLITE_FINALIZE(res);
usec_t ended_ut = now_monotonic_usec(); (void)ended_ut;
nd_log_daemon(
NDLP_DEBUG,
"Stored %zu host context cleanup items in %0.2f ms",
entries,
(double)(ended_ut - started_ut) / USEC_PER_MS);
worker_is_idle();
}
static void store_alert_transitions(struct judy_list_t *pending_alert_list)
{
if (!pending_alert_list)
return;
worker_is_busy(UV_EVENT_STORE_ALERT_TRANSITIONS);
usec_t started_ut = now_monotonic_usec(); (void)started_ut;
size_t entries = pending_alert_list->count;
Word_t Index = 0;
bool first = true;
Pvoid_t *Pvalue;
while ((Pvalue = JudyLFirstThenNext(pending_alert_list->JudyL, &Index, &first))) {
RRDHOST *host = *Pvalue;
Pvalue = JudyLGet(pending_alert_list->JudyL, ++Index, PJE0);
ALARM_ENTRY *ae = *Pvalue;
sql_health_alarm_log_save(host, ae);
__atomic_add_fetch(&ae->pending_save_count, -1, __ATOMIC_RELAXED);
__atomic_add_fetch(&host->health.pending_transitions, -1, __ATOMIC_RELAXED);
}
(void) JudyLFreeArray(&pending_alert_list->JudyL, PJE0);
freez(pending_alert_list);
usec_t ended_ut = now_monotonic_usec(); (void)ended_ut;
nd_log(
NDLS_DAEMON,
NDLP_DEBUG,
"Stored and processed %zu alert transitions in %0.2f ms",
entries,
(double)(ended_ut - started_ut) / USEC_PER_MS);
worker_is_idle();
}
static void store_sql_statements(struct judy_list_t *pending_sql_statement)
{
if (!pending_sql_statement)
return;
worker_is_busy(METADATA_EXECUTE_STORE_STATEMENT);
usec_t started_ut = now_monotonic_usec();
size_t entries = pending_sql_statement->count;
Word_t Index = 0;
bool first = true;
Pvoid_t *Pvalue;
while ((Pvalue = JudyLFirstThenNext(pending_sql_statement->JudyL, &Index, &first))) {
sqlite3_stmt *stmt = *Pvalue;
if (unlikely(!stmt))
continue;
int rc = sqlite3_step_monitored(stmt);
if (unlikely(rc != SQLITE_DONE))
nd_log_daemon(NDLP_ERR, "Failed to execute sql statement, rc = %d", rc);
SQLITE_FINALIZE(stmt);
}
(void) JudyLFreeArray(&pending_sql_statement->JudyL, PJE0);
freez(pending_sql_statement);
COMPUTE_DURATION(report_duration, "us", started_ut, now_monotonic_usec());
nd_log_daemon(NDLP_DEBUG, "Stored and processed %zu sql statements in %s", entries, report_duration);
worker_is_idle();
}
static void meta_store_host_labels(RRDHOST *host, BUFFER *work_buffer)
{
rrdhost_flag_clear(host, RRDHOST_FLAG_METADATA_LABELS);
int rc = exec_statement_with_uuid(SQL_DELETE_HOST_LABELS, &host->host_id.uuid);
if (unlikely(rc)) {
error_report("METADATA: 'host:%s': failed to delete old host labels", rrdhost_hostname(host));
rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE);
return;
}
buffer_flush(work_buffer);
struct query_build tmp = {.sql = work_buffer, .count = 0};
uuid_unparse_lower(host->host_id.uuid, tmp.uuid_str);
rrdlabels_walkthrough_read(host->rrdlabels, host_label_store_to_sql_callback, &tmp);
buffer_strcat(
work_buffer,
" ON CONFLICT (host_id, label_key) DO UPDATE SET source_type = excluded.source_type, label_value=excluded.label_value, date_created=UNIXEPOCH()");
rc = db_execute(db_meta, buffer_tostring(work_buffer));
if (unlikely(rc)) {
error_report("METADATA: 'host:%s': failed to update metadata host labels", rrdhost_hostname(host));
rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE);
}
}
static void store_host_claim_id(RRDHOST *host)
{
rrdhost_flag_clear(host, RRDHOST_FLAG_METADATA_CLAIMID);
int rc;
ND_UUID uuid = claim_id_get_uuid();
if (!UUIDiszero(uuid))
rc = store_claim_id(&host->host_id.uuid, &uuid.uuid);
else
rc = store_claim_id(&host->host_id.uuid, NULL);
if (unlikely(rc))
rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_CLAIMID | RRDHOST_FLAG_METADATA_UPDATE);
}
void store_host_info_and_metadata(RRDHOST *host, BUFFER *work_buffer)
{
// Store labels (if needed)
if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_LABELS)))
meta_store_host_labels(host, work_buffer);
// Store claim id (if needed)
if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_CLAIMID)))
store_host_claim_id(host);
// Store host and system info (if needed);
if (rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_INFO))
store_host_and_system_info(host);
}
// Worker thread to scan hosts for pending metadata to store
static void start_metadata_hosts(uv_work_t *req)
{
register_libuv_worker_jobs();
struct scan_metadata_payload *data = req->data;
struct metadata_wc *wc = data->wc;
bool shutting_down = (!wc->scan_complete);
BUFFER *work_buffer = data->work_buffer;
usec_t all_started_ut = now_monotonic_usec();
store_sql_statements((struct judy_list_t *)data->pending_sql_statement);
store_alert_transitions((struct judy_list_t *)data->pending_alert_list);
store_ctx_cleanup_list(wc, (struct judy_list_t *)data->pending_ctx_cleanup_list);
worker_is_busy(UV_EVENT_METADATA_STORE);
RRDHOST *host;
dfe_start_reentrant(rrdhost_root_index, host) {
if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED) || !rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_UPDATE))
continue;
usec_t started_ut = now_monotonic_usec();
rrdhost_flag_clear(host,RRDHOST_FLAG_METADATA_UPDATE);
worker_is_busy(UV_EVENT_STORE_HOST);
// store labels, claim_id, host and system info (if needed)
store_host_info_and_metadata(host, work_buffer);
worker_is_idle();
metadata_scan_host(host, work_buffer, shutting_down);
COMPUTE_DURATION(report_duration, "us", started_ut, now_monotonic_usec());
nd_log_daemon(NDLP_DEBUG, "Host %s saved metadata in %s", rrdhost_hostname(host), report_duration);
}
dfe_done(host);
COMPUTE_DURATION(report_duration, "us", all_started_ut, now_monotonic_usec());
nd_log_daemon(NDLP_DEBUG, "Checking all hosts completed in %s", report_duration);
do_pending_uuid_deletion(wc, (struct judy_list_t *)data->pending_uuid_deletion);
run_metadata_cleanup(wc);
wc->metadata_check_after = now_realtime_sec() + METADATA_HOST_CHECK_INTERVAL;
worker_is_idle();
}
static void close_callback(uv_handle_t *handle, void *data __maybe_unused)
{
if (handle->type == UV_TIMER) {
uv_timer_stop((uv_timer_t *)handle);
}
uv_close(handle, NULL); // Automatically close and free the handle
}
#define EVENT_LOOP_NAME "METASYNC"
static void metadata_event_loop(void *arg)
{
struct metadata_wc *config = arg;
uv_thread_set_name_np(EVENT_LOOP_NAME);
worker_register(EVENT_LOOP_NAME);
config->ar = aral_by_size_acquire(sizeof(struct metadata_cmd));
worker_register_job_name(METADATA_DATABASE_NOOP, "noop");
worker_register_job_name(METADATA_DEL_DIMENSION, "delete dimension");
worker_register_job_name(METADATA_STORE_CLAIM_ID, "add claim id");
worker_register_job_name(METADATA_ADD_CTX_CLEANUP, "host ctx cleanup");
worker_register_job_name(METADATA_SCAN_HOSTS, "host metadata store");
worker_register_job_name(METADATA_LOAD_HOST_CONTEXT, "host load context");
worker_register_job_name(METADATA_ADD_HOST_AE, "add host alert entry");
worker_register_job_name(METADATA_DEL_HOST_AE, "delete host alert entry");
worker_register_job_name(METADATA_EXECUTE_STORE_STATEMENT, "add sql statement");
unsigned cmd_batch_size;
uv_loop_t *loop = &config->loop;
fatal_assert(0 == uv_loop_init(loop));
fatal_assert(0 == uv_async_init(loop, &config->async, async_cb));
fatal_assert(0 == uv_timer_init(loop, &config->timer_req));
fatal_assert(0 == uv_timer_start(&config->timer_req, timer_cb, TIMER_INITIAL_PERIOD_MS, TIMER_REPEAT_PERIOD_MS));
loop->data = config;
config->async.data = config;
config->timer_req.data = config;
nd_log(NDLS_DAEMON, NDLP_DEBUG, "Starting metadata sync thread");
struct metadata_cmd cmd;
memset(&cmd, 0, sizeof(cmd));
metadata_flag_clear(config, METADATA_FLAG_PROCESSING);
config->metadata_check_after = now_realtime_sec() + METADATA_HOST_CHECK_FIRST_CHECK;
completion_mark_complete(&config->start_stop_complete);
BUFFER *work_buffer = buffer_create(1024, &netdata_buffers_statistics.buffers_sqlite);
struct scan_metadata_payload *data;
Pvoid_t *Pvalue;
struct judy_list_t *pending_ae_list = NULL;
struct judy_list_t *pending_ctx_cleanup_list = NULL;
struct judy_list_t *pending_uuid_deletion = NULL;
struct judy_list_t *pending_sql_statement = NULL;
int shutdown = 0;
config->initialized = true;
while (shutdown == 0 || (config->flags & METADATA_FLAG_PROCESSING)) {
nd_uuid_t *uuid;
RRDHOST *host = NULL;
ALARM_ENTRY *ae = NULL;
sqlite3_stmt *stmt;
enum metadata_opcode opcode;
worker_is_idle();
uv_run(loop, UV_RUN_DEFAULT);
/* wait for commands */
cmd_batch_size = 0;
do {
if (unlikely(cmd_batch_size >= METADATA_MAX_BATCH_SIZE))
break;
cmd = metadata_deq_cmd(config);
opcode = cmd.opcode;
if (unlikely(opcode == METADATA_DATABASE_NOOP && metadata_flag_check(config, METADATA_FLAG_SHUTDOWN))) {
shutdown = 1;
continue;
}
++cmd_batch_size;
if (likely(opcode != METADATA_DATABASE_NOOP))
worker_is_busy(opcode);
switch (opcode) {
case METADATA_DATABASE_NOOP:
break;
case METADATA_DEL_DIMENSION:
uuid = (nd_uuid_t *) cmd.param[0];
if (!pending_uuid_deletion)
pending_uuid_deletion = callocz(1, sizeof(*pending_uuid_deletion));
Pvalue = JudyLIns(&pending_uuid_deletion->JudyL, ++pending_uuid_deletion->count, PJE0);
if (Pvalue != PJERR)
*Pvalue = uuid;
else {
// Failure in Judy, attempt to continue running anyway
// ignore uuid, global cleanup will take care of it
freez(uuid);
}
break;
case METADATA_STORE_CLAIM_ID:
store_claim_id((nd_uuid_t *) cmd.param[0], (nd_uuid_t *) cmd.param[1]);
freez((void *) cmd.param[0]);
freez((void *) cmd.param[1]);
break;
case METADATA_ADD_CTX_CLEANUP:
if (!pending_ctx_cleanup_list)
pending_ctx_cleanup_list = callocz(1, sizeof(*pending_ctx_cleanup_list));
struct host_ctx_cleanup_s *ctx_cleanup = (struct host_ctx_cleanup_s *) cmd.param[0];
Pvalue = JudyLIns(&pending_ctx_cleanup_list->JudyL, ++pending_ctx_cleanup_list->count, PJE0);
if (Pvalue != PJERR)
*Pvalue = ctx_cleanup;
else {
// Failure in Judy, attempt to continue running anyway
// Cleanup structure
string_freez(ctx_cleanup->context);
freez(ctx_cleanup);
}
break;
case METADATA_SCAN_HOSTS:
if (unlikely(metadata_flag_check(config, METADATA_FLAG_PROCESSING)))
break;
if (unittest_running)
break;
data = mallocz(sizeof(*data));
data->request.data = data;
data->wc = config;
data->pending_alert_list = pending_ae_list;
data->pending_ctx_cleanup_list = pending_ctx_cleanup_list;
data->pending_uuid_deletion = pending_uuid_deletion;
data->pending_sql_statement = pending_sql_statement;
data->work_buffer = work_buffer;
pending_ae_list = NULL;
pending_ctx_cleanup_list = NULL;
pending_uuid_deletion = NULL;
pending_sql_statement = NULL;
if (unlikely(cmd.completion))
cmd.completion = NULL; // Do not complete after launching worker (worker will do)
metadata_flag_set(config, METADATA_FLAG_PROCESSING);
if (uv_queue_work(loop, &data->request, start_metadata_hosts, after_metadata_hosts)) {
// Failed to launch worker -- let the event loop handle completion
cmd.completion = config->scan_complete;
pending_ae_list = data->pending_alert_list;
pending_ctx_cleanup_list = data->pending_ctx_cleanup_list;
pending_uuid_deletion = data->pending_uuid_deletion;
pending_sql_statement = data->pending_sql_statement;
freez(data);
metadata_flag_clear(config, METADATA_FLAG_PROCESSING);
}
break;
case METADATA_LOAD_HOST_CONTEXT:
if (unittest_running)
break;
data = callocz(1,sizeof(*data));
data->request.data = data;
data->wc = config;
if (uv_queue_work(loop, &data->request, start_all_host_load_context, after_start_host_load_context)) {
freez(data);
}
break;
case METADATA_ADD_HOST_AE:
host = (RRDHOST *) cmd.param[0];
ae = (ALARM_ENTRY *) cmd.param[1];
if (!pending_ae_list)
pending_ae_list = callocz(1, sizeof(*pending_ae_list));
Pvalue = JudyLIns(&pending_ae_list->JudyL, ++pending_ae_list->count, PJE0);
if (Pvalue)
*Pvalue = (void *)host;
Pvalue = JudyLIns(&pending_ae_list->JudyL, ++pending_ae_list->count, PJE0);
if (Pvalue)
*Pvalue = (void *)ae;
break;
case METADATA_DEL_HOST_AE:
(void) JudyLIns(&config->ae_DelJudyL, (Word_t) (void *) cmd.param[0], PJE0);
break;
case METADATA_EXECUTE_STORE_STATEMENT:
stmt = (sqlite3_stmt *) cmd.param[0];
if (!pending_sql_statement)
pending_sql_statement = callocz(1, sizeof(*pending_sql_statement));
Pvalue = JudyLIns(&pending_sql_statement->JudyL, ++pending_sql_statement->count, PJE0);
if (Pvalue)
*Pvalue = (void *)stmt;
break;
case METADATA_UNITTEST:;
struct thread_unittest *tu = (struct thread_unittest *) cmd.param[0];
sleep_usec(1000); // processing takes 1ms
__atomic_fetch_add(&tu->processed, 1, __ATOMIC_SEQ_CST);
break;
default:
break;
}
if (cmd.completion)
completion_mark_complete(cmd.completion);
} while (opcode != METADATA_DATABASE_NOOP);
}
config->initialized = false;
uv_walk(loop, (uv_walk_cb) close_callback, NULL);
uv_run(loop, UV_RUN_NOWAIT);
int rc;
do {
rc = uv_loop_close(loop);
} while (rc != UV_EBUSY);
buffer_free(work_buffer);
worker_unregister();
nd_log(NDLS_DAEMON, NDLP_DEBUG, "Shutting down metadata thread");
completion_mark_complete(&config->start_stop_complete);
if (config->scan_complete) {
completion_destroy(config->scan_complete);
freez(config->scan_complete);
}
Word_t Index;
bool first;
if (pending_ae_list) {
(void)JudyLFreeArray(&pending_ae_list->JudyL, PJE0);
freez(pending_ae_list);
}
if (pending_ctx_cleanup_list) {
Index = 0;
first = true;
while ((Pvalue = JudyLFirstThenNext(pending_ctx_cleanup_list->JudyL, &Index, &first))) {
if (!*Pvalue)
continue;
struct host_ctx_cleanup_s *ctx_cleanup = *Pvalue;
string_freez(ctx_cleanup->context);
freez(ctx_cleanup);
}
(void)JudyLFreeArray(&pending_ctx_cleanup_list->JudyL, PJE0);
freez(pending_ctx_cleanup_list);
}
metadata_free_cmd_queue(config);
aral_by_size_release(config->ar);
worker_unregister();
}
void metadata_sync_shutdown(void)
{
completion_init(&metasync_worker.start_stop_complete);
struct metadata_cmd cmd;
memset(&cmd, 0, sizeof(cmd));
nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Sending a shutdown command");
cmd.opcode = METADATA_SYNC_SHUTDOWN;
metadata_enq_cmd(&metasync_worker, &cmd);
/* wait for metadata thread to shut down */
nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Waiting for shutdown ACK");
completion_wait_for(&metasync_worker.start_stop_complete);
completion_destroy(&metasync_worker.start_stop_complete);
nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Shutdown complete");
}
void metadata_sync_shutdown_prepare(void)
{
static bool running = false;
if (unlikely(!metasync_worker.initialized || running))
return;
running = true;
struct metadata_cmd cmd;
memset(&cmd, 0, sizeof(cmd));
struct metadata_wc *wc = &metasync_worker;
struct completion *compl = mallocz(sizeof(*compl));
completion_init(compl);
__atomic_store_n(&wc->scan_complete, compl, __ATOMIC_RELAXED);
nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Sending a scan host command");
uint32_t max_wait_iterations = 2000;
while (unlikely(metadata_flag_check(&metasync_worker, METADATA_FLAG_PROCESSING)) && max_wait_iterations--) {
if (max_wait_iterations == 1999)
nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Current worker is running; waiting to finish");
sleep_usec(1000);
}
cmd.opcode = METADATA_SCAN_HOSTS;
metadata_enq_cmd(&metasync_worker, &cmd);
nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Waiting for host scan completion");
completion_wait_for(wc->scan_complete);
nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Host scan complete; can continue with shutdown");
}
void *metadata_sync_shutdown_thread(void *ptr __maybe_unused) {
metadata_sync_shutdown_prepare();
return NULL;
}
static ND_THREAD *metdata_sync_shutdown_background_wait_thread = NULL;
void metadata_sync_shutdown_background(void) {
metdata_sync_shutdown_background_wait_thread = nd_thread_create(
"METASYNC-SHUTDOWN", NETDATA_THREAD_OPTION_JOINABLE, metadata_sync_shutdown_thread, NULL);
}
void metadata_sync_shutdown_background_wait(void) {
nd_thread_join(metdata_sync_shutdown_background_wait_thread);
metadata_sync_shutdown();
}
// -------------------------------------------------------------
// Init function called on agent startup
void metadata_sync_init(void)
{
memset(&metasync_worker, 0, sizeof(metasync_worker));
completion_init(&metasync_worker.start_stop_complete);
fatal_assert(0 == uv_thread_create(&metasync_worker.thread, metadata_event_loop, &metasync_worker));
completion_wait_for(&metasync_worker.start_stop_complete);
completion_destroy(&metasync_worker.start_stop_complete);
nd_log(NDLS_DAEMON, NDLP_DEBUG, "SQLite metadata sync initialization complete");
}
// Helpers
static inline void queue_metadata_cmd(enum metadata_opcode opcode, const void *param0, const void *param1)
{
struct metadata_cmd cmd;
cmd.opcode = opcode;
cmd.param[0] = param0;
cmd.param[1] = param1;
cmd.completion = NULL;
metadata_enq_cmd(&metasync_worker, &cmd);
}
// Public
void metaqueue_delete_dimension_uuid(nd_uuid_t *uuid)
{
if (unlikely(!uuid))
return;
nd_uuid_t *use_uuid = mallocz(sizeof(*uuid));
uuid_copy(*use_uuid, *uuid);
queue_metadata_cmd(METADATA_DEL_DIMENSION, use_uuid, NULL);
}
void metaqueue_store_claim_id(nd_uuid_t *host_uuid, nd_uuid_t *claim_uuid)
{
if (unlikely(!host_uuid))
return;
nd_uuid_t *local_host_uuid = mallocz(sizeof(*host_uuid));
nd_uuid_t *local_claim_uuid = NULL;
uuid_copy(*local_host_uuid, *host_uuid);
if (likely(claim_uuid)) {
local_claim_uuid = mallocz(sizeof(*claim_uuid));
uuid_copy(*local_claim_uuid, *claim_uuid);
}
queue_metadata_cmd(METADATA_STORE_CLAIM_ID, local_host_uuid, local_claim_uuid);
}
void metaqueue_ml_load_models(RRDDIM *rd)
{
rrddim_flag_set(rd, RRDDIM_FLAG_ML_MODEL_LOAD);
}
void metadata_queue_load_host_context()
{
queue_metadata_cmd(METADATA_LOAD_HOST_CONTEXT, NULL, NULL);
nd_log(NDLS_DAEMON, NDLP_DEBUG, "Queued command to load host contexts");
}
void metadata_queue_ctx_host_cleanup(nd_uuid_t *host_uuid, const char *context)
{
if (unlikely(!host_uuid || !context))
return;
struct host_ctx_cleanup_s *ctx_cleanup = mallocz(sizeof(*ctx_cleanup));
uuid_copy(ctx_cleanup->host_uuid, *host_uuid);
ctx_cleanup->context = string_strdupz(context);
queue_metadata_cmd(METADATA_ADD_CTX_CLEANUP, ctx_cleanup, NULL);
}
void metadata_queue_ae_save(RRDHOST *host, ALARM_ENTRY *ae)
{
if (unlikely(!host || !ae))
return;
__atomic_add_fetch(&host->health.pending_transitions, 1, __ATOMIC_RELAXED);
__atomic_add_fetch(&ae->pending_save_count, 1, __ATOMIC_RELAXED);
queue_metadata_cmd(METADATA_ADD_HOST_AE, host, ae);
}
void metadata_queue_ae_deletion(ALARM_ENTRY *ae)
{
if (unlikely(!ae))
return;
queue_metadata_cmd(METADATA_DEL_HOST_AE, ae, NULL);
}
void metadata_execute_store_statement(sqlite3_stmt *stmt)
{
if (unlikely(!stmt))
return;
queue_metadata_cmd(METADATA_EXECUTE_STORE_STATEMENT, stmt, NULL);
}
void commit_alert_transitions(RRDHOST *host __maybe_unused)
{
queue_metadata_cmd(METADATA_SCAN_HOSTS, NULL, NULL);
}
uint64_t sqlite_get_meta_space(void)
{
return sqlite_get_db_space(db_meta);
}
#define SQL_ADD_AGENT_EVENT_LOG \
"INSERT INTO agent_event_log (event_type, version, value, date_created) VALUES " \
" (@event_type, @version, @value, UNIXEPOCH())"
void add_agent_event(event_log_type_t event_id, int64_t value)
{
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, SQL_ADD_AGENT_EVENT_LOG, &res))
return;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, event_id));
SQLITE_BIND_FAIL(done, sqlite3_bind_text(res, ++param, NETDATA_VERSION, -1, SQLITE_STATIC));
SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, value));
param = 0;
int rc = execute_insert(res);
if (rc != SQLITE_DONE)
error_report("Failed to store agent event information, rc = %d", rc);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
}
void cleanup_agent_event_log(void)
{
(void) db_execute(db_meta, "DELETE FROM agent_event_log WHERE date_created < UNIXEPOCH() - 30 * 86400");
}
#define SQL_GET_AGENT_EVENT_TYPE_MEDIAN \
"SELECT AVG(value) AS median FROM " \
"(SELECT value FROM agent_event_log WHERE event_type = @event ORDER BY value " \
" LIMIT 2 - (SELECT COUNT(*) FROM agent_event_log WHERE event_type = @event) % 2 " \
"OFFSET(SELECT(COUNT(*) - 1) / 2 FROM agent_event_log WHERE event_type = @event)) "
usec_t get_agent_event_time_median(event_log_type_t event_id)
{
static bool initialized[EVENT_AGENT_MAX] = { 0 };
static usec_t median[EVENT_AGENT_MAX] = { 0 };
if(event_id >= EVENT_AGENT_MAX)
return 0;
if(initialized[event_id])
return median[event_id];
sqlite3_stmt *res = NULL;
if (!PREPARE_STATEMENT(db_meta, SQL_GET_AGENT_EVENT_TYPE_MEDIAN, &res))
return 0;
usec_t avg_time = 0;
int param = 0;
SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, event_id));
param = 0;
if (sqlite3_step_monitored(res) == SQLITE_ROW)
avg_time = sqlite3_column_int64(res, 0);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_FINALIZE(res);
median[event_id] = avg_time;
initialized[event_id] = true;
return avg_time;
}
void get_agent_event_time_median_init(void) {
for(event_log_type_t event_id = 1; event_id < EVENT_AGENT_MAX; event_id++)
get_agent_event_time_median(event_id);
}
//
// unitests
//
static void *unittest_queue_metadata(void *arg) {
struct thread_unittest *tu = arg;
struct metadata_cmd cmd;
cmd.opcode = METADATA_UNITTEST;
cmd.param[0] = tu;
cmd.param[1] = NULL;
cmd.completion = NULL;
metadata_enq_cmd(&metasync_worker, &cmd);
do {
__atomic_fetch_add(&tu->added, 1, __ATOMIC_SEQ_CST);
metadata_enq_cmd(&metasync_worker, &cmd);
sleep_usec(10000);
} while (!__atomic_load_n(&tu->join, __ATOMIC_RELAXED));
return arg;
}
static void *metadata_unittest_threads(void)
{
unsigned done;
struct thread_unittest tu = {
.join = 0,
.added = 0,
.processed = 0,
.done = &done,
};
// Queue messages / Time it
time_t seconds_to_run = 5;
int threads_to_create = 4;
fprintf(
stderr,
"\nChecking metadata queue using %d threads for %lld seconds...\n",
threads_to_create,
(long long)seconds_to_run);
ND_THREAD *threads[threads_to_create];
tu.join = 0;
for (int i = 0; i < threads_to_create; i++) {
char buf[100 + 1];
snprintf(buf, sizeof(buf) - 1, "META[%d]", i);
threads[i] = nd_thread_create(
buf,
NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE,
unittest_queue_metadata,
&tu);
}
(void) uv_async_send(&metasync_worker.async);
sleep_usec(seconds_to_run * USEC_PER_SEC);
__atomic_store_n(&tu.join, 1, __ATOMIC_RELAXED);
for (int i = 0; i < threads_to_create; i++) {
nd_thread_join(threads[i]);
}
sleep_usec(5 * USEC_PER_SEC);
fprintf(stderr, "Added %u elements, processed %u\n", tu.added, tu.processed);
return 0;
}
int metadata_unittest(void)
{
metadata_sync_init();
// Queue items for a specific period of time
metadata_unittest_threads();
metadata_sync_shutdown();
return 0;
}