0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-11 00:20:15 +00:00

Add host flag to check for pending alert transitions to save

Remove precompiled statements
Offload processing of alerts in the event loop
Queue alert transitions to the metadata event loop to be saved
Run metadata checks every 5 seconds
This commit is contained in:
Stelios Fragkakis 2024-11-25 10:41:58 +02:00
parent 6fca66c7c9
commit 86ade0e87e
No known key found for this signature in database
GPG key ID: C2532B995ABF88AE
8 changed files with 201 additions and 98 deletions

View file

@ -936,25 +936,26 @@ typedef enum __attribute__ ((__packed__)) rrdhost_flags {
// Health
RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION = (1 << 17), // contains charts and dims with uninitialized variables
RRDHOST_FLAG_INITIALIZED_HEALTH = (1 << 18), // the host has initialized health structures
RRDHOST_FLAG_PENDING_ALERT_TRANSITIONS_SAVE = (1 << 19), // the host has pending alert transitions to store
// Exporting
RRDHOST_FLAG_EXPORTING_SEND = (1 << 19), // send it to external databases
RRDHOST_FLAG_EXPORTING_DONT_SEND = (1 << 20), // don't send it to external databases
RRDHOST_FLAG_EXPORTING_SEND = (1 << 20), // send it to external databases
RRDHOST_FLAG_EXPORTING_DONT_SEND = (1 << 21), // don't send it to external databases
// ACLK
RRDHOST_FLAG_ACLK_STREAM_CONTEXTS = (1 << 21), // when set, we should send ACLK stream context updates
RRDHOST_FLAG_ACLK_STREAM_ALERTS = (1 << 22), // Host should stream alerts
RRDHOST_FLAG_ACLK_STREAM_CONTEXTS = (1 << 22), // when set, we should send ACLK stream context updates
RRDHOST_FLAG_ACLK_STREAM_ALERTS = (1 << 23), // Host should stream alerts
// Metadata
RRDHOST_FLAG_METADATA_UPDATE = (1 << 23), // metadata needs to be stored in the database
RRDHOST_FLAG_METADATA_LABELS = (1 << 24), // metadata needs to be stored in the database
RRDHOST_FLAG_METADATA_INFO = (1 << 25), // metadata needs to be stored in the database
RRDHOST_FLAG_PENDING_CONTEXT_LOAD = (1 << 26), // Context needs to be loaded
RRDHOST_FLAG_METADATA_UPDATE = (1 << 24), // metadata needs to be stored in the database
RRDHOST_FLAG_METADATA_LABELS = (1 << 25), // metadata needs to be stored in the database
RRDHOST_FLAG_METADATA_INFO = (1 << 26), // metadata needs to be stored in the database
RRDHOST_FLAG_PENDING_CONTEXT_LOAD = (1 << 27), // Context needs to be loaded
RRDHOST_FLAG_METADATA_CLAIMID = (1 << 27), // metadata needs to be stored in the database
RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED = (1 << 28), // set when the receiver part is disconnected
RRDHOST_FLAG_METADATA_CLAIMID = (1 << 28), // metadata needs to be stored in the database
RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED = (1 << 29), // set when the receiver part is disconnected
RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED = (1 << 29), // set when the host has updated global functions
RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED = (1 << 30), // set when the host has updated global functions
} RRDHOST_FLAGS;
#define rrdhost_flag_check(host, flag) (__atomic_load_n(&((host)->flags), __ATOMIC_SEQ_CST) & (flag))
@ -1038,6 +1039,7 @@ struct alarm_entry {
RRDCALC_STATUS new_status;
uint32_t flags;
bool pending_save;
int delay;
time_t delay_up_to_timestamp;

View file

@ -50,6 +50,11 @@ typedef struct aclk_sync_cfg_t {
time_t node_collectors_send;
char node_id[UUID_STR_LEN];
char *alerts_snapshot_uuid; // will contain the snapshot_uuid value if snapshot was requested
struct {
Pvoid_t JudyL; // alert transitions to save
uint32_t count;
SPINLOCK spinlock;
} alert_transition;
} aclk_sync_cfg_t;
void create_aclk_config(RRDHOST *host, nd_uuid_t *host_uuid, nd_uuid_t *node_id);

View file

@ -18,9 +18,9 @@
static inline bool is_event_from_alert_variable_config(int64_t unique_id, nd_uuid_t *host_id)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_SELECT_VARIABLE_ALERT_BY_UNIQUE_ID, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_SELECT_VARIABLE_ALERT_BY_UNIQUE_ID, &res))
return false;
bool ret = false;
@ -34,20 +34,18 @@ static inline bool is_event_from_alert_variable_config(int64_t unique_id, nd_uui
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
return ret;
}
#define MAX_REMOVED_PERIOD 604800 //a week
#define SQL_UPDATE_ALERT_VERSION_TRANSITION \
"UPDATE alert_version SET unique_id = @unique_id WHERE health_log_id = @health_log_id"
static void update_alert_version_transition(int64_t health_log_id, int64_t unique_id)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_UPDATE_ALERT_VERSION_TRANSITION, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_UPDATE_ALERT_VERSION_TRANSITION, &res))
return;
int param = 0;
@ -61,7 +59,7 @@ static void update_alert_version_transition(int64_t health_log_id, int64_t uniqu
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
}
//decide if some events should be sent or not
@ -70,9 +68,9 @@ done:
static bool cloud_status_matches(int64_t health_log_id, RRDCALC_STATUS status)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_SELECT_LAST_ALERT_STATUS, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_SELECT_LAST_ALERT_STATUS, &res))
return true;
bool send = false;
@ -89,7 +87,7 @@ static bool cloud_status_matches(int64_t health_log_id, RRDCALC_STATUS status)
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
return send;
}
@ -108,7 +106,7 @@ done:
//
static int insert_alert_to_submit_queue(RRDHOST *host, int64_t health_log_id, uint32_t unique_id, RRDCALC_STATUS status)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (cloud_status_matches(health_log_id, status)) {
update_alert_version_transition(health_log_id, unique_id);
@ -118,7 +116,7 @@ static int insert_alert_to_submit_queue(RRDHOST *host, int64_t health_log_id, ui
if (is_event_from_alert_variable_config(unique_id, &host->host_id.uuid))
return 2;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_QUEUE_ALERT_TO_CLOUD, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_QUEUE_ALERT_TO_CLOUD, &res))
return -1;
int param = 0;
@ -133,7 +131,7 @@ static int insert_alert_to_submit_queue(RRDHOST *host, int64_t health_log_id, ui
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
return 0;
}
@ -145,9 +143,9 @@ done:
//
static int delete_alert_from_submit_queue(RRDHOST *host, int64_t first_seq_id, int64_t last_seq_id)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_DELETE_QUEUE_ALERT_TO_CLOUD, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_DELETE_QUEUE_ALERT_TO_CLOUD, &res))
return -1;
int param = 0;
@ -162,7 +160,7 @@ static int delete_alert_from_submit_queue(RRDHOST *host, int64_t first_seq_id, i
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
return 0;
}
@ -224,9 +222,9 @@ static inline char *sqlite3_text_strdupz_empty(sqlite3_stmt *res, int iCol) {
//
static void sql_update_alert_version(int64_t health_log_id, int64_t unique_id, RRDCALC_STATUS status, uint64_t version)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_UPDATE_ALERT_VERSION, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_UPDATE_ALERT_VERSION, &res))
return;
int param = 0;
@ -242,7 +240,7 @@ static void sql_update_alert_version(int64_t health_log_id, int64_t unique_id, R
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
}
#define SQL_SELECT_ALERT_TO_DUMMY \
@ -422,6 +420,7 @@ void health_alarm_log_populate(
" ORDER BY aq.sequence_id ASC LIMIT "ACLK_MAX_ALERT_UPDATES
static void aclk_push_alert_event(RRDHOST *host __maybe_unused)
{
CLAIM_ID claim_id = claim_id_get();
@ -487,9 +486,9 @@ done:
static void delete_alert_from_pending_queue(RRDHOST *host, int64_t row)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_DELETE_PROCESSED_ROWS, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_DELETE_PROCESSED_ROWS, &res))
return;
int param = 0;
@ -503,7 +502,7 @@ static void delete_alert_from_pending_queue(RRDHOST *host, int64_t row)
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
}
#define SQL_REBUILD_HOST_ALERT_VERSION_TABLE \
@ -555,9 +554,9 @@ done:
bool process_alert_pending_queue(RRDHOST *host)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_PROCESS_ALERT_PENDING_QUEUE, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_PROCESS_ALERT_PENDING_QUEUE, &res))
return false;
int param = 0;
@ -587,7 +586,7 @@ bool process_alert_pending_queue(RRDHOST *host)
nd_log(NDLS_ACCESS, NDLP_NOTICE, "ACLK STA [%s (N/A)]: Processed %d entries, queued %d", rrdhost_hostname(host), count, added);
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
return added > 0;
}
@ -766,9 +765,9 @@ done:
static uint64_t calculate_node_alert_version(RRDHOST *host)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_ALERT_VERSION_CALC, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_ALERT_VERSION_CALC, &res))
return 0;
uint64_t version = 0;
@ -782,7 +781,7 @@ static uint64_t calculate_node_alert_version(RRDHOST *host)
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
return version;
}

View file

@ -26,12 +26,11 @@
static void sql_health_alarm_log_update(RRDHOST *host, ALARM_ENTRY *ae)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
int rc;
REQUIRE_DB(db_meta);
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_UPDATE_HEALTH_LOG, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_UPDATE_HEALTH_LOG, &res))
return;
int param = 0;
@ -51,7 +50,7 @@ static void sql_health_alarm_log_update(RRDHOST *host, ALARM_ENTRY *ae)
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
}
/* Health related SQL queries
@ -149,13 +148,13 @@ static void insert_alert_queue(
RRDCALC_STATUS old_status,
RRDCALC_STATUS new_status)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
int rc;
if (!host->aclk_config)
return;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_INSERT_ALERT_PENDING_QUEUE, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_INSERT_ALERT_PENDING_QUEUE, &res))
return;
int submit_delay = calculate_delay(old_status, new_status);
@ -176,7 +175,7 @@ static void insert_alert_queue(
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
}
#define SQL_INSERT_HEALTH_LOG_DETAIL \
@ -189,10 +188,10 @@ done:
static void sql_health_alarm_log_insert_detail(RRDHOST *host, uint64_t health_log_id, ALARM_ENTRY *ae)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
int rc;
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_INSERT_HEALTH_LOG_DETAIL, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_INSERT_HEALTH_LOG_DETAIL, &res))
return;
int param = 0;
@ -230,7 +229,7 @@ static void sql_health_alarm_log_insert_detail(RRDHOST *host, uint64_t health_lo
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
}
#define SQL_INSERT_HEALTH_LOG \
@ -243,13 +242,11 @@ done:
static void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae)
{
static __thread sqlite3_stmt *res = NULL;
sqlite3_stmt *res = NULL;
int rc;
uint64_t health_log_id;
REQUIRE_DB(db_meta);
if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_INSERT_HEALTH_LOG, &res))
if (!PREPARE_STATEMENT(db_meta, SQL_INSERT_HEALTH_LOG, &res))
return;
int param = 0;
@ -277,7 +274,7 @@ static void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae)
done:
REPORT_BIND_FAIL(res, param);
SQLITE_RESET(res);
SQLITE_FINALIZE(res);
}
void sql_health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae)

View file

@ -173,8 +173,7 @@ sqlite3 *db_meta = NULL;
#define METADATA_RUNTIME_THRESHOLD (5) // Run time threshold for cleanup task
#define METADATA_HOST_CHECK_FIRST_CHECK (5) // First check for pending metadata
#define METADATA_HOST_CHECK_INTERVAL (30) // Repeat check for pending metadata
#define METADATA_HOST_CHECK_IMMEDIATE (5) // Repeat immediate run because we have more metadata to write
#define METADATA_HOST_CHECK_INTERVAL (5) // Repeat check for pending metadata
#define MAX_METADATA_CLEANUP (500) // Maximum metadata write operations (e.g deletes before retrying)
#define METADATA_MAX_BATCH_SIZE (512) // Maximum commands to execute before running the event loop
@ -190,6 +189,8 @@ enum metadata_opcode {
METADATA_SCAN_HOSTS,
METADATA_LOAD_HOST_CONTEXT,
METADATA_DELETE_HOST_CHART_LABELS,
METADATA_ADD_HOST_AE,
METADATA_DEL_HOST_AE,
METADATA_MAINTENANCE,
METADATA_SYNC_SHUTDOWN,
METADATA_UNITTEST,
@ -217,6 +218,7 @@ struct metadata_wc {
uv_async_t async;
uv_timer_t timer_req;
time_t metadata_check_after;
Pvoid_t ae_DelJudyL;
METADATA_FLAG flags;
struct completion start_stop_complete;
struct completion *scan_complete;
@ -1749,6 +1751,17 @@ static void after_metadata_hosts(uv_work_t *req, int status __maybe_unused)
struct scan_metadata_payload *data = req->data;
struct metadata_wc *wc = data->wc;
bool first = false;
Word_t Index = 0;
Pvoid_t *Pvalue;
while ((Pvalue = JudyLFirstThenNext(wc->ae_DelJudyL, &Index, &first))) {
ALARM_ENTRY *ae = (ALARM_ENTRY *) Index;
if(!__atomic_load_n(&ae->pending_save, __ATOMIC_RELAXED)) {
health_alarm_log_free_one_nochecks_nounlink(ae);
(void) JudyLDel(&wc->ae_DelJudyL, Index, PJE0);
}
}
metadata_flag_clear(wc, METADATA_FLAG_PROCESSING);
if (unlikely(wc->scan_complete))
@ -1860,6 +1873,49 @@ struct host_chart_label_cleanup {
Word_t count;
};
void do_host_ae_save(RRDHOST *host)
{
usec_t started_ut = now_monotonic_usec(); (void)started_ut;
struct aclk_sync_cfg_t *host_aclk_sync = host->aclk_config;
spinlock_lock(&host_aclk_sync->alert_transition.spinlock);
size_t entries = host_aclk_sync->alert_transition.count;
if (entries) {
Word_t Index = 0;
bool first = true;
Pvoid_t *PValue;
while ((PValue = JudyLFirstThenNext(host_aclk_sync->alert_transition.JudyL, &Index, &first))) {
ALARM_ENTRY *ae = *PValue;
sql_health_alarm_log_save(host, ae);
__atomic_clear(&ae->pending_save, __ATOMIC_RELEASE);
}
(void) JudyLFreeArray(&host_aclk_sync->alert_transition.JudyL, PJE0);
host_aclk_sync->alert_transition.count = 0;
}
struct aclk_sync_cfg_t *wc = host->aclk_config;
if (wc && wc->send_snapshot == 1) {
wc->send_snapshot = 2;
rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
}
else {
if (process_alert_pending_queue(host))
rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
}
rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_ALERT_TRANSITIONS_SAVE);
spinlock_unlock(&host_aclk_sync->alert_transition.spinlock);
usec_t ended_ut = now_monotonic_usec(); (void)ended_ut;
if (entries)
nd_log(
NDLS_DAEMON,
NDLP_DEBUG,
"Stored and processed %zu alert transitions for \"%s\" in %0.2f ms",
entries,
rrdhost_hostname(host),
(double)(ended_ut - started_ut) / USEC_PER_MS);
}
static void do_chart_label_cleanup(struct host_chart_label_cleanup *cl_cleanup_data)
{
if (!cl_cleanup_data)
@ -1909,6 +1965,8 @@ static void start_metadata_hosts(uv_work_t *req __maybe_unused)
transaction_started = !db_execute(db_meta, "BEGIN TRANSACTION");
dfe_start_reentrant(rrdhost_root_index, host) {
do_host_ae_save(host);
if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED) || !rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_UPDATE))
continue;
@ -1988,12 +2046,10 @@ static void start_metadata_hosts(uv_work_t *req __maybe_unused)
"Checking all hosts completed in %0.2f ms",
(double)(all_ended_ut - all_started_ut) / USEC_PER_MS);
if (unlikely(run_again))
wc->metadata_check_after = now_realtime_sec() + METADATA_HOST_CHECK_IMMEDIATE;
else {
wc->metadata_check_after = now_realtime_sec() + METADATA_HOST_CHECK_INTERVAL;
if (likely(!run_again))
run_metadata_cleanup(wc);
}
wc->metadata_check_after = now_realtime_sec() + METADATA_HOST_CHECK_INTERVAL;
worker_is_idle();
}
@ -2050,10 +2106,12 @@ static void metadata_event_loop(void *arg)
BUFFER *work_buffer = buffer_create(1024, &netdata_buffers_statistics.buffers_sqlite);
struct scan_metadata_payload *data;
struct host_chart_label_cleanup *cl_cleanup_data = NULL;
Pvoid_t *PValue;
while (shutdown == 0 || (wc->flags & METADATA_FLAG_PROCESSING)) {
nd_uuid_t *uuid;
RRDHOST *host = NULL;
struct aclk_sync_cfg_t *host_aclk_sync;
worker_is_idle();
uv_run(loop, UV_RUN_DEFAULT);
@ -2118,10 +2176,7 @@ static void metadata_event_loop(void *arg)
data->max_count = 5000;
metadata_flag_set(wc, METADATA_FLAG_PROCESSING);
if (unlikely(
uv_queue_work(loop,&data->request,
start_metadata_hosts,
after_metadata_hosts))) {
if (uv_queue_work(loop, &data->request, start_metadata_hosts, after_metadata_hosts)) {
// Failed to launch worker -- let the event loop handle completion
cmd.completion = wc->scan_complete;
cl_cleanup_data = data->data;
@ -2136,9 +2191,7 @@ static void metadata_event_loop(void *arg)
data = callocz(1,sizeof(*data));
data->request.data = data;
data->wc = wc;
if (unlikely(
uv_queue_work(loop,&data->request, start_all_host_load_context,
after_start_host_load_context))) {
if (uv_queue_work(loop, &data->request, start_all_host_load_context, after_start_host_load_context)) {
freez(data);
}
break;
@ -2146,10 +2199,23 @@ static void metadata_event_loop(void *arg)
if (!cl_cleanup_data)
cl_cleanup_data = callocz(1,sizeof(*cl_cleanup_data));
Pvoid_t *PValue = JudyLIns(&cl_cleanup_data->JudyL, (Word_t) ++cl_cleanup_data->count, PJE0);
PValue = JudyLIns(&cl_cleanup_data->JudyL, (Word_t) ++cl_cleanup_data->count, PJE0);
if (PValue)
*PValue = (void *) cmd.param[0];
break;
case METADATA_ADD_HOST_AE:
host = (RRDHOST *) cmd.param[0];
host_aclk_sync = host->aclk_config;
spinlock_lock(&host_aclk_sync->alert_transition.spinlock);
PValue = JudyLIns(&host_aclk_sync->alert_transition.JudyL, ++host_aclk_sync->alert_transition.count, PJE0);
if(unlikely(!PValue || PValue == PJERR))
fatal("RRDLABELS: corrupted health transitions judyl array");
*PValue = (void *)cmd.param[1];
spinlock_unlock(&host_aclk_sync->alert_transition.spinlock);
break;
case METADATA_DEL_HOST_AE:;
(void) JudyLIns(&wc->ae_DelJudyL, (Word_t) (void *) cmd.param[0], PJE0);
break;
case METADATA_UNITTEST:;
struct thread_unittest *tu = (struct thread_unittest *) cmd.param[0];
@ -2335,6 +2401,30 @@ void metadata_delete_host_chart_labels(char *machine_guid)
nd_log(NDLS_DAEMON, NDLP_DEBUG, "Queued command delete chart labels for host %s", machine_guid);
}
void metadata_queue_ae_save(RRDHOST *host, ALARM_ENTRY *ae)
{
if (unlikely(!metasync_worker.loop))
return;
queue_metadata_cmd(METADATA_ADD_HOST_AE, host, ae);
}
void metadata_queue_ae_deletion(ALARM_ENTRY *ae)
{
if (unlikely(!metasync_worker.loop))
return;
queue_metadata_cmd(METADATA_DEL_HOST_AE, ae, NULL);
}
void commit_alert_transitions(RRDHOST *host)
{
if (unlikely(!metasync_worker.loop))
return;
rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_ALERT_TRANSITIONS_SAVE);
queue_metadata_cmd(METADATA_SCAN_HOSTS, host, NULL);
}
uint64_t sqlite_get_meta_space(void)
{
return sqlite_get_db_space(db_meta);

View file

@ -66,6 +66,10 @@ int sql_init_meta_database(db_check_action_type_t rebuild, int memory);
void cleanup_agent_event_log(void);
void add_agent_event(event_log_type_t event_id, int64_t value);
usec_t get_agent_event_time_median(event_log_type_t event_id);
void metadata_queue_ae_save(RRDHOST *host, ALARM_ENTRY *ae);
void metadata_queue_ae_deletion(ALARM_ENTRY *ae);
void commit_alert_transitions(RRDHOST *host);
void do_host_ae_save(RRDHOST *host);
// UNIT TEST
int metadata_unittest(void);

View file

@ -250,6 +250,15 @@ static void health_event_loop(void) {
if (unlikely(!host->health.enabled))
continue;
if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_ALERT_TRANSITIONS_SAVE))) {
nd_log(
NDLS_DAEMON,
NDLP_DEBUG,
"Host \"%s\" has pending alert transitions to save, postponing health checks",
rrdhost_hostname(host));
continue;
}
if (unlikely(!rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH)))
health_initialize_rrdhost(host);
@ -649,14 +658,8 @@ static void health_event_loop(void) {
break;
}
}
struct aclk_sync_cfg_t *wc = host->aclk_config;
if (wc && wc->send_snapshot == 1) {
wc->send_snapshot = 2;
rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
}
else
if (process_alert_pending_queue(host))
rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
commit_alert_transitions(host);
dfe_done(host);

View file

@ -6,7 +6,8 @@
inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae)
{
sql_health_alarm_log_save(host, ae);
__atomic_test_and_set(&ae->pending_save, __ATOMIC_ACQUIRE);
metadata_queue_ae_save(host, ae);
}
@ -167,10 +168,8 @@ inline ALARM_ENTRY* health_create_alarm_entry(
return ae;
}
inline void health_alarm_log_add_entry(
RRDHOST *host,
ALARM_ENTRY *ae
) {
inline void health_alarm_log_add_entry(RRDHOST *host, ALARM_ENTRY *ae)
{
netdata_log_debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id);
__atomic_add_fetch(&host->health_transitions, 1, __ATOMIC_RELAXED);
@ -209,20 +208,24 @@ inline void health_alarm_log_add_entry(
}
inline void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae) {
string_freez(ae->name);
string_freez(ae->chart);
string_freez(ae->chart_context);
string_freez(ae->classification);
string_freez(ae->component);
string_freez(ae->type);
string_freez(ae->exec);
string_freez(ae->recipient);
string_freez(ae->source);
string_freez(ae->units);
string_freez(ae->info);
string_freez(ae->old_value_string);
string_freez(ae->new_value_string);
freez(ae);
if(__atomic_load_n(&ae->pending_save, __ATOMIC_RELAXED))
metadata_queue_ae_deletion(ae);
else {
string_freez(ae->name);
string_freez(ae->chart);
string_freez(ae->chart_context);
string_freez(ae->classification);
string_freez(ae->component);
string_freez(ae->type);
string_freez(ae->exec);
string_freez(ae->recipient);
string_freez(ae->source);
string_freez(ae->units);
string_freez(ae->info);
string_freez(ae->old_value_string);
string_freez(ae->new_value_string);
freez(ae);
}
}
inline void health_alarm_log_free(RRDHOST *host) {