mirror of
https://github.com/netdata/netdata.git
synced 2025-04-14 17:48:37 +00:00

* cleanup alerts * fix references * fix references * fix references * load alerts once and apply them to each node * simplify health_create_alarm_entry() * Compile without warnings with compiler flags: -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self * code re-organization and cleanup * generate patterns when applying prototypes; give unique dyncfg names to all alerts * eval expressions keep the source and the parsed_as as STRING pointers * renamed host to node in dyncfg ids * renamed host to node in dyncfg ids * add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level * working functionality * code re-organization: moved health event-loop to a new file, moved health globals to health.c * rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs * dyncfg alert prototypes initialization for alerts * health dyncfg split to separate file * cleanup not-needed code * normalize matches between parsing and json * also detect !* for disabled alerts * dyncfg capability disabled * Store alert config part1 * Add rrdlabels_common_count * wip health variables lookup without indexes * Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter * working variables with runtime lookup * working variables with runtime lookup * delete rrddimvar and rrdfamily index * remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts * added /api/v1/variable that resolves a variable the same way alerts do * remove rrdcalc from eval * remove debug code * remove duplicate assignment * Fix memory leak * all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts * hide all internal structures of EVAL * Enable -Wformat flag Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> * Adjust binding for calculation, warning, critical * Remove unused macro * Update config hash id * use the right info and summary in alerts log * use synchronous queries for alerts * Handle cases when config_hash_id is missing from health_log * remove deadlock from health worker * parsing to json payload for health alert prototypes * cleaner parsing and avoiding memory leaks in case of duplicate members in json * fix left-over rename of function * Keep original lookup field to send to the cloud Cleanup / rename function to store config Remove unused DEFINEs, functions * Use ac->lookup * link jobs to the host when the template is registered; do not accept running a function without a host * full dyncfg support for health alerts, except action TEST * working dyncfg additions, updates, removals * fixed missing source, wrong status updates * add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint * fix dyncfg unittest * rename functions * generalize the json-c parser macros and move them to libnetdata * report progress when enabling and disabling dyncfg templates * moved rrdcalc and rrdvar to health * update alarms * added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts * enable parsed json alerts; allow sending back accepted but disabled * added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output * remove the RRDHOST pointer from DYNCFG * Fix command field submitted to the cloud * do not send updates to creation requests, for DYNCFG jobs --------- Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: ilyam8 <ilya@netdata.cloud>
427 lines
14 KiB
C
427 lines
14 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "dyncfg-internals.h"
|
|
#include "dyncfg.h"
|
|
|
|
struct dyncfg_globals dyncfg_globals = { 0 };
|
|
|
|
RRDHOST *dyncfg_rrdhost_by_uuid(UUID *uuid) {
|
|
char uuid_str[UUID_STR_LEN];
|
|
uuid_unparse_lower(uuid->uuid, uuid_str);
|
|
|
|
RRDHOST *host = rrdhost_find_by_guid(uuid_str);
|
|
if(!host)
|
|
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host with UUID '%s'", uuid_str);
|
|
|
|
return host;
|
|
}
|
|
|
|
RRDHOST *dyncfg_rrdhost(DYNCFG *df) {
|
|
return dyncfg_rrdhost_by_uuid(&df->host_uuid);
|
|
}
|
|
|
|
void dyncfg_cleanup(DYNCFG *v) {
|
|
buffer_free(v->payload);
|
|
v->payload = NULL;
|
|
|
|
string_freez(v->path);
|
|
v->path = NULL;
|
|
|
|
string_freez(v->source);
|
|
v->source = NULL;
|
|
|
|
string_freez(v->function);
|
|
v->function = NULL;
|
|
|
|
string_freez(v->template);
|
|
v->template = NULL;
|
|
}
|
|
|
|
static void dyncfg_normalize(DYNCFG *df) {
|
|
usec_t now_ut = now_realtime_usec();
|
|
|
|
if(!df->created_ut)
|
|
df->created_ut = now_ut;
|
|
|
|
if(!df->modified_ut)
|
|
df->modified_ut = now_ut;
|
|
}
|
|
|
|
static void dyncfg_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
|
|
DYNCFG *df = value;
|
|
dyncfg_cleanup(df);
|
|
}
|
|
|
|
static void dyncfg_insert_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) {
|
|
DYNCFG *df = value;
|
|
dyncfg_normalize(df);
|
|
|
|
const char *id = dictionary_acquired_item_name(item);
|
|
char buf[strlen(id) + 20];
|
|
snprintfz(buf, sizeof(buf), PLUGINSD_FUNCTION_CONFIG " %s", id);
|
|
df->function = string_strdupz(buf);
|
|
|
|
if(df->type == DYNCFG_TYPE_JOB && !df->template) {
|
|
const char *last_colon = strrchr(id, ':');
|
|
if(last_colon)
|
|
df->template = string_strndupz(id, last_colon - id);
|
|
else
|
|
nd_log(NDLS_DAEMON, NDLP_WARNING,
|
|
"DYNCFG: id '%s' is a job, but does not contain a colon to find the template", id);
|
|
}
|
|
}
|
|
|
|
static void dyncfg_react_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
|
|
DYNCFG *df = value; (void)df;
|
|
;
|
|
}
|
|
|
|
static bool dyncfg_conflict_cb(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) {
|
|
DYNCFG *v = old_value;
|
|
DYNCFG *nv = new_value;
|
|
|
|
size_t changes = 0;
|
|
|
|
dyncfg_normalize(nv);
|
|
|
|
if(!UUIDeq(v->host_uuid, nv->host_uuid)) {
|
|
SWAP(v->host_uuid, nv->host_uuid);
|
|
changes++;
|
|
}
|
|
|
|
if(v->path != nv->path) {
|
|
SWAP(v->path, nv->path);
|
|
changes++;
|
|
}
|
|
|
|
if(v->status != nv->status) {
|
|
SWAP(v->status, nv->status);
|
|
changes++;
|
|
}
|
|
|
|
if(v->type != nv->type) {
|
|
SWAP(v->type, nv->type);
|
|
changes++;
|
|
}
|
|
|
|
if(v->source_type != nv->source_type) {
|
|
SWAP(v->source_type, nv->source_type);
|
|
changes++;
|
|
}
|
|
|
|
if(v->cmds != nv->cmds) {
|
|
SWAP(v->cmds, nv->cmds);
|
|
changes++;
|
|
}
|
|
|
|
if(v->source != nv->source) {
|
|
SWAP(v->source, nv->source);
|
|
changes++;
|
|
}
|
|
|
|
if(nv->created_ut < v->created_ut) {
|
|
SWAP(v->created_ut, nv->created_ut);
|
|
changes++;
|
|
}
|
|
|
|
if(nv->modified_ut > v->modified_ut) {
|
|
SWAP(v->modified_ut, nv->modified_ut);
|
|
changes++;
|
|
}
|
|
|
|
if(v->sync != nv->sync) {
|
|
SWAP(v->sync, nv->sync);
|
|
changes++;
|
|
}
|
|
|
|
if(nv->payload) {
|
|
SWAP(v->payload, nv->payload);
|
|
changes++;
|
|
}
|
|
|
|
if(!v->execute_cb || (nv->overwrite_cb && nv->execute_cb && (v->execute_cb != nv->execute_cb || v->execute_cb_data != nv->execute_cb_data))) {
|
|
v->execute_cb = nv->execute_cb;
|
|
v->execute_cb_data = nv->execute_cb_data;
|
|
changes++;
|
|
}
|
|
|
|
dyncfg_cleanup(nv);
|
|
|
|
return changes > 0;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void dyncfg_init_low_level(bool load_saved) {
|
|
if(!dyncfg_globals.nodes) {
|
|
dyncfg_globals.nodes = dictionary_create_advanced(DICT_OPTION_FIXED_SIZE | DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(DYNCFG));
|
|
dictionary_register_insert_callback(dyncfg_globals.nodes, dyncfg_insert_cb, NULL);
|
|
dictionary_register_react_callback(dyncfg_globals.nodes, dyncfg_react_cb, NULL);
|
|
dictionary_register_conflict_callback(dyncfg_globals.nodes, dyncfg_conflict_cb, NULL);
|
|
dictionary_register_delete_callback(dyncfg_globals.nodes, dyncfg_delete_cb, NULL);
|
|
|
|
char path[PATH_MAX];
|
|
snprintfz(path, sizeof(path), "%s/%s", netdata_configured_varlib_dir, "config");
|
|
|
|
if(mkdir(path, 0755) == -1) {
|
|
if(errno != EEXIST)
|
|
nd_log(NDLS_DAEMON, NDLP_CRIT, "DYNCFG: failed to create dynamic configuration directory '%s'", path);
|
|
}
|
|
|
|
dyncfg_globals.dir = strdupz(path);
|
|
|
|
if(load_saved)
|
|
dyncfg_load_all();
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, rrd_function_execute_cb_t execute_cb, void *execute_cb_data, bool overwrite_cb) {
|
|
DYNCFG tmp = {
|
|
.host_uuid = uuid2UUID(host->host_uuid),
|
|
.path = string_strdupz(path),
|
|
.status = status,
|
|
.type = type,
|
|
.cmds = cmds,
|
|
.source_type = source_type,
|
|
.source = string_strdupz(source),
|
|
.created_ut = created_ut,
|
|
.modified_ut = modified_ut,
|
|
.sync = sync,
|
|
.user_disabled = false,
|
|
.restart_required = false,
|
|
.payload = NULL,
|
|
.execute_cb = execute_cb,
|
|
.execute_cb_data = execute_cb_data,
|
|
.overwrite_cb = overwrite_cb,
|
|
};
|
|
|
|
return dictionary_set_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1, &tmp, sizeof(tmp), NULL);
|
|
}
|
|
|
|
static void dyncfg_send_updates(const char *id) {
|
|
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1);
|
|
if(!item) {
|
|
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: asked to update plugin for configuration '%s', but it is not found.", id);
|
|
return;
|
|
}
|
|
|
|
DYNCFG *df = dictionary_acquired_item_value(item);
|
|
|
|
if(df->type == DYNCFG_TYPE_SINGLE || df->type == DYNCFG_TYPE_JOB) {
|
|
if (df->cmds & DYNCFG_CMD_UPDATE && df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->payload && buffer_strlen(df->payload))
|
|
dyncfg_echo_update(item, df, id);
|
|
}
|
|
else if(df->type == DYNCFG_TYPE_TEMPLATE && (df->cmds & DYNCFG_CMD_ADD)) {
|
|
STRING *template = string_strdupz(id);
|
|
|
|
size_t len = strlen(id);
|
|
DYNCFG *df_job;
|
|
dfe_start_reentrant(dyncfg_globals.nodes, df_job) {
|
|
const char *id_template = df_job_dfe.name;
|
|
if(df_job->type == DYNCFG_TYPE_JOB && // it is a job
|
|
df_job->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && // it is dynamically configured
|
|
df_job->template == template && // it has the same template name
|
|
strncmp(id_template, id, len) == 0 && // the template name matches (redundant)
|
|
id_template[len] == ':' && // immediately after the template there is ':'
|
|
id_template[len + 1]) { // and there is something else after the ':'
|
|
dyncfg_echo_add(item, df_job_dfe.item, df, df_job, id, &id_template[len + 1]);
|
|
}
|
|
}
|
|
dfe_done(df_job);
|
|
|
|
string_freez(template);
|
|
}
|
|
|
|
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
|
|
}
|
|
|
|
bool dyncfg_is_user_disabled(const char *id) {
|
|
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id);
|
|
if(!item)
|
|
return false;
|
|
|
|
DYNCFG *df = dictionary_acquired_item_value(item);
|
|
bool ret = df->user_disabled;
|
|
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
|
|
return ret;
|
|
}
|
|
|
|
bool dyncfg_job_has_registered_template(const char *id) {
|
|
char buf[strlen(id) + 1];
|
|
memcpy(buf, id, sizeof(buf));
|
|
char *colon = strrchr(buf, ':');
|
|
if(!colon)
|
|
return false;
|
|
|
|
*colon = '\0';
|
|
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, buf);
|
|
if(!item)
|
|
return false;
|
|
|
|
DYNCFG *df = dictionary_acquired_item_value(item);
|
|
bool ret = df->type == DYNCFG_TYPE_TEMPLATE;
|
|
|
|
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
|
|
return ret;
|
|
}
|
|
|
|
bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, rrd_function_execute_cb_t execute_cb, void *execute_cb_data) {
|
|
if(!dyncfg_is_valid_id(id)) {
|
|
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id);
|
|
return false;
|
|
}
|
|
|
|
if(type == DYNCFG_TYPE_JOB && !dyncfg_job_has_registered_template(id)) {
|
|
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: job id '%s' does not have a registered template. Ignoring dynamic configuration for it.", id);
|
|
return false;
|
|
}
|
|
|
|
DYNCFG_CMDS old_cmds = cmds;
|
|
|
|
// all configurations support schema
|
|
cmds |= DYNCFG_CMD_SCHEMA;
|
|
|
|
// if there is either enable or disable, both are supported
|
|
if(cmds & (DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE))
|
|
cmds |= DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE;
|
|
|
|
// add
|
|
if(type == DYNCFG_TYPE_TEMPLATE) {
|
|
// templates must always support "add"
|
|
cmds |= DYNCFG_CMD_ADD;
|
|
}
|
|
else {
|
|
// only templates can have "add"
|
|
cmds &= ~DYNCFG_CMD_ADD;
|
|
}
|
|
|
|
// remove
|
|
if(source_type != DYNCFG_SOURCE_TYPE_DYNCFG || type != DYNCFG_TYPE_JOB) {
|
|
// remove is only available for dyncfg jobs
|
|
cmds &= ~DYNCFG_CMD_REMOVE;
|
|
}
|
|
|
|
// data
|
|
if(type == DYNCFG_TYPE_TEMPLATE) {
|
|
// templates do not have data
|
|
cmds &= ~(DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE | DYNCFG_CMD_TEST);
|
|
}
|
|
|
|
if(cmds != old_cmds) {
|
|
CLEAN_BUFFER *t = buffer_create(1024, NULL);
|
|
buffer_sprintf(t, "DYNCFG: id '%s' was declared with cmds: ", id);
|
|
dyncfg_cmds2buffer(old_cmds, t);
|
|
buffer_strcat(t, ", but they have sanitized to: ");
|
|
dyncfg_cmds2buffer(cmds, t);
|
|
nd_log(NDLS_DAEMON, NDLP_NOTICE, "%s", buffer_tostring(t));
|
|
}
|
|
|
|
const DICTIONARY_ITEM *item = dyncfg_add_internal(host, id, path, status, type, source_type, source, cmds, created_ut, modified_ut, sync, execute_cb, execute_cb_data, true);
|
|
DYNCFG *df = dictionary_acquired_item_value(item);
|
|
|
|
// if(df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !df->saves)
|
|
// nd_log(NDLS_DAEMON, NDLP_WARNING, "DYNCFG: configuration '%s' is created with source type dyncfg, but we don't have a saved configuration for it", id);
|
|
|
|
rrd_collector_started();
|
|
rrd_function_add(
|
|
host,
|
|
NULL,
|
|
string2str(df->function),
|
|
120,
|
|
1000,
|
|
"Dynamic configuration",
|
|
"config",
|
|
HTTP_ACCESS_ADMIN,
|
|
sync,
|
|
dyncfg_function_intercept_cb,
|
|
NULL);
|
|
|
|
if(df->type != DYNCFG_TYPE_TEMPLATE) {
|
|
DYNCFG_CMDS status_to_send_to_plugin =
|
|
(df->user_disabled || df->status == DYNCFG_STATUS_DISABLED) ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE;
|
|
|
|
if (status_to_send_to_plugin == DYNCFG_CMD_ENABLE && dyncfg_is_user_disabled(string2str(df->template)))
|
|
status_to_send_to_plugin = DYNCFG_CMD_DISABLE;
|
|
|
|
dyncfg_echo(item, df, id, status_to_send_to_plugin);
|
|
}
|
|
|
|
if(!(df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->type == DYNCFG_TYPE_JOB))
|
|
dyncfg_send_updates(id);
|
|
|
|
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
|
|
|
|
return true;
|
|
}
|
|
|
|
void dyncfg_del_low_level(RRDHOST *host, const char *id) {
|
|
if(!dyncfg_is_valid_id(id)) {
|
|
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id);
|
|
return;
|
|
}
|
|
|
|
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id);
|
|
if(item) {
|
|
DYNCFG *df = dictionary_acquired_item_value(item);
|
|
rrd_function_del(host, NULL, string2str(df->function));
|
|
|
|
bool garbage_collect = false;
|
|
if(df->saves == 0) {
|
|
dictionary_del(dyncfg_globals.nodes, id);
|
|
garbage_collect = true;
|
|
}
|
|
|
|
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
|
|
|
|
if(garbage_collect)
|
|
dictionary_garbage_collect(dyncfg_globals.nodes);
|
|
}
|
|
}
|
|
|
|
void dyncfg_status_low_level(RRDHOST *host __maybe_unused, const char *id, DYNCFG_STATUS status) {
|
|
if(!dyncfg_is_valid_id(id)) {
|
|
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id);
|
|
return;
|
|
}
|
|
|
|
if(status == DYNCFG_STATUS_NONE) {
|
|
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: status provided to id '%s' is invalid. Ignoring it.", id);
|
|
return;
|
|
}
|
|
|
|
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id);
|
|
if(item) {
|
|
DYNCFG *df = dictionary_acquired_item_value(item);
|
|
df->status = status;
|
|
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void dyncfg_add_streaming(BUFFER *wb) {
|
|
// when sending config functions to parents, we send only 1 function called 'config';
|
|
// the parent will send the command to the child, and the child will validate it;
|
|
// this way the parent does not need to receive removals of config functions;
|
|
|
|
buffer_sprintf(wb
|
|
, PLUGINSD_KEYWORD_FUNCTION " GLOBAL " PLUGINSD_FUNCTION_CONFIG " %d \"%s\" \"%s\" \"%s\" %d\n"
|
|
, 120
|
|
, "Dynamic configuration"
|
|
, "config"
|
|
, http_id2access(HTTP_ACCESS_ADMIN)
|
|
, 1000
|
|
);
|
|
}
|
|
|
|
bool dyncfg_available_for_rrdhost(RRDHOST *host) {
|
|
if(host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST))
|
|
return true;
|
|
|
|
return rrd_function_available(host, PLUGINSD_FUNCTION_CONFIG);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|