0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-14 17:48:37 +00:00
netdata_netdata/daemon/config/dyncfg.c
Costa Tsaousis f466b8aef5
DYNCFG: dynamically configured alerts ()
* cleanup alerts

* fix references

* fix references

* fix references

* load alerts once and apply them to each node

* simplify health_create_alarm_entry()

* Compile without warnings with compiler flags:

   -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self

* code re-organization and cleanup

* generate patterns when applying prototypes; give unique dyncfg names to all alerts

* eval expressions keep the source and the parsed_as as STRING pointers

* renamed host to node in dyncfg ids

* renamed host to node in dyncfg ids

* add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level

* working functionality

* code re-organization: moved health event-loop to a new file, moved health globals to health.c

* rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs

* dyncfg alert prototypes initialization for alerts

* health dyncfg split to separate file

* cleanup not-needed code

* normalize matches between parsing and json

* also detect !* for disabled alerts

* dyncfg capability disabled

* Store alert config part1

* Add rrdlabels_common_count

* wip health variables lookup without indexes

* Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter

* working variables with runtime lookup

* working variables with runtime lookup

* delete rrddimvar and rrdfamily index

* remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts

* added /api/v1/variable that resolves a variable the same way alerts do

* remove rrdcalc from eval

* remove debug code

* remove duplicate assignment

* Fix memory leak

* all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts

* hide all internal structures of EVAL

* Enable -Wformat flag

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Adjust binding for calculation, warning, critical

* Remove unused macro

* Update config hash id

* use the right info and summary in alerts log

* use synchronous queries for alerts

* Handle cases when config_hash_id is missing from health_log

* remove deadlock from health worker

* parsing to json payload for health alert prototypes

* cleaner parsing and avoiding memory leaks in case of duplicate members in json

* fix left-over rename of function

* Keep original lookup field to send to the cloud
Cleanup / rename function to store config
Remove unused DEFINEs, functions

* Use ac->lookup

* link jobs to the host when the template is registered; do not accept running a function without a host

* full dyncfg support for health alerts, except action TEST

* working dyncfg additions, updates, removals

* fixed missing source, wrong status updates

* add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint

* fix dyncfg unittest

* rename functions

* generalize the json-c parser macros and move them to libnetdata

* report progress when enabling and disabling dyncfg templates

* moved rrdcalc and rrdvar to health

* update alarms

* added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts

* enable parsed json alerts; allow sending back accepted but disabled

* added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output

* remove the RRDHOST pointer from DYNCFG

* Fix command field submitted to the cloud

* do not send updates to creation requests, for DYNCFG jobs

---------

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: ilyam8 <ilya@netdata.cloud>
2024-01-23 20:20:41 +02:00

427 lines
14 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "dyncfg-internals.h"
#include "dyncfg.h"
struct dyncfg_globals dyncfg_globals = { 0 };
RRDHOST *dyncfg_rrdhost_by_uuid(UUID *uuid) {
char uuid_str[UUID_STR_LEN];
uuid_unparse_lower(uuid->uuid, uuid_str);
RRDHOST *host = rrdhost_find_by_guid(uuid_str);
if(!host)
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host with UUID '%s'", uuid_str);
return host;
}
RRDHOST *dyncfg_rrdhost(DYNCFG *df) {
return dyncfg_rrdhost_by_uuid(&df->host_uuid);
}
void dyncfg_cleanup(DYNCFG *v) {
buffer_free(v->payload);
v->payload = NULL;
string_freez(v->path);
v->path = NULL;
string_freez(v->source);
v->source = NULL;
string_freez(v->function);
v->function = NULL;
string_freez(v->template);
v->template = NULL;
}
static void dyncfg_normalize(DYNCFG *df) {
usec_t now_ut = now_realtime_usec();
if(!df->created_ut)
df->created_ut = now_ut;
if(!df->modified_ut)
df->modified_ut = now_ut;
}
static void dyncfg_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
DYNCFG *df = value;
dyncfg_cleanup(df);
}
static void dyncfg_insert_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) {
DYNCFG *df = value;
dyncfg_normalize(df);
const char *id = dictionary_acquired_item_name(item);
char buf[strlen(id) + 20];
snprintfz(buf, sizeof(buf), PLUGINSD_FUNCTION_CONFIG " %s", id);
df->function = string_strdupz(buf);
if(df->type == DYNCFG_TYPE_JOB && !df->template) {
const char *last_colon = strrchr(id, ':');
if(last_colon)
df->template = string_strndupz(id, last_colon - id);
else
nd_log(NDLS_DAEMON, NDLP_WARNING,
"DYNCFG: id '%s' is a job, but does not contain a colon to find the template", id);
}
}
static void dyncfg_react_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
DYNCFG *df = value; (void)df;
;
}
static bool dyncfg_conflict_cb(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) {
DYNCFG *v = old_value;
DYNCFG *nv = new_value;
size_t changes = 0;
dyncfg_normalize(nv);
if(!UUIDeq(v->host_uuid, nv->host_uuid)) {
SWAP(v->host_uuid, nv->host_uuid);
changes++;
}
if(v->path != nv->path) {
SWAP(v->path, nv->path);
changes++;
}
if(v->status != nv->status) {
SWAP(v->status, nv->status);
changes++;
}
if(v->type != nv->type) {
SWAP(v->type, nv->type);
changes++;
}
if(v->source_type != nv->source_type) {
SWAP(v->source_type, nv->source_type);
changes++;
}
if(v->cmds != nv->cmds) {
SWAP(v->cmds, nv->cmds);
changes++;
}
if(v->source != nv->source) {
SWAP(v->source, nv->source);
changes++;
}
if(nv->created_ut < v->created_ut) {
SWAP(v->created_ut, nv->created_ut);
changes++;
}
if(nv->modified_ut > v->modified_ut) {
SWAP(v->modified_ut, nv->modified_ut);
changes++;
}
if(v->sync != nv->sync) {
SWAP(v->sync, nv->sync);
changes++;
}
if(nv->payload) {
SWAP(v->payload, nv->payload);
changes++;
}
if(!v->execute_cb || (nv->overwrite_cb && nv->execute_cb && (v->execute_cb != nv->execute_cb || v->execute_cb_data != nv->execute_cb_data))) {
v->execute_cb = nv->execute_cb;
v->execute_cb_data = nv->execute_cb_data;
changes++;
}
dyncfg_cleanup(nv);
return changes > 0;
}
// ----------------------------------------------------------------------------
void dyncfg_init_low_level(bool load_saved) {
if(!dyncfg_globals.nodes) {
dyncfg_globals.nodes = dictionary_create_advanced(DICT_OPTION_FIXED_SIZE | DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(DYNCFG));
dictionary_register_insert_callback(dyncfg_globals.nodes, dyncfg_insert_cb, NULL);
dictionary_register_react_callback(dyncfg_globals.nodes, dyncfg_react_cb, NULL);
dictionary_register_conflict_callback(dyncfg_globals.nodes, dyncfg_conflict_cb, NULL);
dictionary_register_delete_callback(dyncfg_globals.nodes, dyncfg_delete_cb, NULL);
char path[PATH_MAX];
snprintfz(path, sizeof(path), "%s/%s", netdata_configured_varlib_dir, "config");
if(mkdir(path, 0755) == -1) {
if(errno != EEXIST)
nd_log(NDLS_DAEMON, NDLP_CRIT, "DYNCFG: failed to create dynamic configuration directory '%s'", path);
}
dyncfg_globals.dir = strdupz(path);
if(load_saved)
dyncfg_load_all();
}
}
// ----------------------------------------------------------------------------
const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, rrd_function_execute_cb_t execute_cb, void *execute_cb_data, bool overwrite_cb) {
DYNCFG tmp = {
.host_uuid = uuid2UUID(host->host_uuid),
.path = string_strdupz(path),
.status = status,
.type = type,
.cmds = cmds,
.source_type = source_type,
.source = string_strdupz(source),
.created_ut = created_ut,
.modified_ut = modified_ut,
.sync = sync,
.user_disabled = false,
.restart_required = false,
.payload = NULL,
.execute_cb = execute_cb,
.execute_cb_data = execute_cb_data,
.overwrite_cb = overwrite_cb,
};
return dictionary_set_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1, &tmp, sizeof(tmp), NULL);
}
static void dyncfg_send_updates(const char *id) {
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1);
if(!item) {
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: asked to update plugin for configuration '%s', but it is not found.", id);
return;
}
DYNCFG *df = dictionary_acquired_item_value(item);
if(df->type == DYNCFG_TYPE_SINGLE || df->type == DYNCFG_TYPE_JOB) {
if (df->cmds & DYNCFG_CMD_UPDATE && df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->payload && buffer_strlen(df->payload))
dyncfg_echo_update(item, df, id);
}
else if(df->type == DYNCFG_TYPE_TEMPLATE && (df->cmds & DYNCFG_CMD_ADD)) {
STRING *template = string_strdupz(id);
size_t len = strlen(id);
DYNCFG *df_job;
dfe_start_reentrant(dyncfg_globals.nodes, df_job) {
const char *id_template = df_job_dfe.name;
if(df_job->type == DYNCFG_TYPE_JOB && // it is a job
df_job->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && // it is dynamically configured
df_job->template == template && // it has the same template name
strncmp(id_template, id, len) == 0 && // the template name matches (redundant)
id_template[len] == ':' && // immediately after the template there is ':'
id_template[len + 1]) { // and there is something else after the ':'
dyncfg_echo_add(item, df_job_dfe.item, df, df_job, id, &id_template[len + 1]);
}
}
dfe_done(df_job);
string_freez(template);
}
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
}
bool dyncfg_is_user_disabled(const char *id) {
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id);
if(!item)
return false;
DYNCFG *df = dictionary_acquired_item_value(item);
bool ret = df->user_disabled;
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
return ret;
}
bool dyncfg_job_has_registered_template(const char *id) {
char buf[strlen(id) + 1];
memcpy(buf, id, sizeof(buf));
char *colon = strrchr(buf, ':');
if(!colon)
return false;
*colon = '\0';
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, buf);
if(!item)
return false;
DYNCFG *df = dictionary_acquired_item_value(item);
bool ret = df->type == DYNCFG_TYPE_TEMPLATE;
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
return ret;
}
bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, rrd_function_execute_cb_t execute_cb, void *execute_cb_data) {
if(!dyncfg_is_valid_id(id)) {
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id);
return false;
}
if(type == DYNCFG_TYPE_JOB && !dyncfg_job_has_registered_template(id)) {
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: job id '%s' does not have a registered template. Ignoring dynamic configuration for it.", id);
return false;
}
DYNCFG_CMDS old_cmds = cmds;
// all configurations support schema
cmds |= DYNCFG_CMD_SCHEMA;
// if there is either enable or disable, both are supported
if(cmds & (DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE))
cmds |= DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE;
// add
if(type == DYNCFG_TYPE_TEMPLATE) {
// templates must always support "add"
cmds |= DYNCFG_CMD_ADD;
}
else {
// only templates can have "add"
cmds &= ~DYNCFG_CMD_ADD;
}
// remove
if(source_type != DYNCFG_SOURCE_TYPE_DYNCFG || type != DYNCFG_TYPE_JOB) {
// remove is only available for dyncfg jobs
cmds &= ~DYNCFG_CMD_REMOVE;
}
// data
if(type == DYNCFG_TYPE_TEMPLATE) {
// templates do not have data
cmds &= ~(DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE | DYNCFG_CMD_TEST);
}
if(cmds != old_cmds) {
CLEAN_BUFFER *t = buffer_create(1024, NULL);
buffer_sprintf(t, "DYNCFG: id '%s' was declared with cmds: ", id);
dyncfg_cmds2buffer(old_cmds, t);
buffer_strcat(t, ", but they have sanitized to: ");
dyncfg_cmds2buffer(cmds, t);
nd_log(NDLS_DAEMON, NDLP_NOTICE, "%s", buffer_tostring(t));
}
const DICTIONARY_ITEM *item = dyncfg_add_internal(host, id, path, status, type, source_type, source, cmds, created_ut, modified_ut, sync, execute_cb, execute_cb_data, true);
DYNCFG *df = dictionary_acquired_item_value(item);
// if(df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !df->saves)
// nd_log(NDLS_DAEMON, NDLP_WARNING, "DYNCFG: configuration '%s' is created with source type dyncfg, but we don't have a saved configuration for it", id);
rrd_collector_started();
rrd_function_add(
host,
NULL,
string2str(df->function),
120,
1000,
"Dynamic configuration",
"config",
HTTP_ACCESS_ADMIN,
sync,
dyncfg_function_intercept_cb,
NULL);
if(df->type != DYNCFG_TYPE_TEMPLATE) {
DYNCFG_CMDS status_to_send_to_plugin =
(df->user_disabled || df->status == DYNCFG_STATUS_DISABLED) ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE;
if (status_to_send_to_plugin == DYNCFG_CMD_ENABLE && dyncfg_is_user_disabled(string2str(df->template)))
status_to_send_to_plugin = DYNCFG_CMD_DISABLE;
dyncfg_echo(item, df, id, status_to_send_to_plugin);
}
if(!(df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->type == DYNCFG_TYPE_JOB))
dyncfg_send_updates(id);
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
return true;
}
void dyncfg_del_low_level(RRDHOST *host, const char *id) {
if(!dyncfg_is_valid_id(id)) {
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id);
return;
}
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id);
if(item) {
DYNCFG *df = dictionary_acquired_item_value(item);
rrd_function_del(host, NULL, string2str(df->function));
bool garbage_collect = false;
if(df->saves == 0) {
dictionary_del(dyncfg_globals.nodes, id);
garbage_collect = true;
}
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
if(garbage_collect)
dictionary_garbage_collect(dyncfg_globals.nodes);
}
}
void dyncfg_status_low_level(RRDHOST *host __maybe_unused, const char *id, DYNCFG_STATUS status) {
if(!dyncfg_is_valid_id(id)) {
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id);
return;
}
if(status == DYNCFG_STATUS_NONE) {
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: status provided to id '%s' is invalid. Ignoring it.", id);
return;
}
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id);
if(item) {
DYNCFG *df = dictionary_acquired_item_value(item);
df->status = status;
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
}
}
// ----------------------------------------------------------------------------
void dyncfg_add_streaming(BUFFER *wb) {
// when sending config functions to parents, we send only 1 function called 'config';
// the parent will send the command to the child, and the child will validate it;
// this way the parent does not need to receive removals of config functions;
buffer_sprintf(wb
, PLUGINSD_KEYWORD_FUNCTION " GLOBAL " PLUGINSD_FUNCTION_CONFIG " %d \"%s\" \"%s\" \"%s\" %d\n"
, 120
, "Dynamic configuration"
, "config"
, http_id2access(HTTP_ACCESS_ADMIN)
, 1000
);
}
bool dyncfg_available_for_rrdhost(RRDHOST *host) {
if(host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST))
return true;
return rrd_function_available(host, PLUGINSD_FUNCTION_CONFIG);
}
// ----------------------------------------------------------------------------