0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-14 09:38:34 +00:00
netdata_netdata/daemon/config/dyncfg-tree.c
Costa Tsaousis f466b8aef5
DYNCFG: dynamically configured alerts ()
* cleanup alerts

* fix references

* fix references

* fix references

* load alerts once and apply them to each node

* simplify health_create_alarm_entry()

* Compile without warnings with compiler flags:

   -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self

* code re-organization and cleanup

* generate patterns when applying prototypes; give unique dyncfg names to all alerts

* eval expressions keep the source and the parsed_as as STRING pointers

* renamed host to node in dyncfg ids

* renamed host to node in dyncfg ids

* add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level

* working functionality

* code re-organization: moved health event-loop to a new file, moved health globals to health.c

* rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs

* dyncfg alert prototypes initialization for alerts

* health dyncfg split to separate file

* cleanup not-needed code

* normalize matches between parsing and json

* also detect !* for disabled alerts

* dyncfg capability disabled

* Store alert config part1

* Add rrdlabels_common_count

* wip health variables lookup without indexes

* Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter

* working variables with runtime lookup

* working variables with runtime lookup

* delete rrddimvar and rrdfamily index

* remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts

* added /api/v1/variable that resolves a variable the same way alerts do

* remove rrdcalc from eval

* remove debug code

* remove duplicate assignment

* Fix memory leak

* all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts

* hide all internal structures of EVAL

* Enable -Wformat flag

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Adjust binding for calculation, warning, critical

* Remove unused macro

* Update config hash id

* use the right info and summary in alerts log

* use synchronous queries for alerts

* Handle cases when config_hash_id is missing from health_log

* remove deadlock from health worker

* parsing to json payload for health alert prototypes

* cleaner parsing and avoiding memory leaks in case of duplicate members in json

* fix left-over rename of function

* Keep original lookup field to send to the cloud
Cleanup / rename function to store config
Remove unused DEFINEs, functions

* Use ac->lookup

* link jobs to the host when the template is registered; do not accept running a function without a host

* full dyncfg support for health alerts, except action TEST

* working dyncfg additions, updates, removals

* fixed missing source, wrong status updates

* add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint

* fix dyncfg unittest

* rename functions

* generalize the json-c parser macros and move them to libnetdata

* report progress when enabling and disabling dyncfg templates

* moved rrdcalc and rrdvar to health

* update alarms

* added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts

* enable parsed json alerts; allow sending back accepted but disabled

* added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output

* remove the RRDHOST pointer from DYNCFG

* Fix command field submitted to the cloud

* do not send updates to creation requests, for DYNCFG jobs

---------

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: ilyam8 <ilya@netdata.cloud>
2024-01-23 20:20:41 +02:00

214 lines
8.1 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "dyncfg-internals.h"
#include "dyncfg.h"
static int dyncfg_tree_compar(const void *a, const void *b) {
const DICTIONARY_ITEM *item1 = *(const DICTIONARY_ITEM **)a;
const DICTIONARY_ITEM *item2 = *(const DICTIONARY_ITEM **)b;
DYNCFG *df1 = dictionary_acquired_item_value(item1);
DYNCFG *df2 = dictionary_acquired_item_value(item2);
int rc = string_cmp(df1->path, df2->path);
if(rc == 0)
rc = strcmp(dictionary_acquired_item_name(item1), dictionary_acquired_item_name(item2));
return rc;
}
static void dyncfg_to_json(DYNCFG *df, const char *id, BUFFER *wb) {
buffer_json_member_add_object(wb, id);
{
buffer_json_member_add_string(wb, "type", dyncfg_id2type(df->type));
if(df->type == DYNCFG_TYPE_JOB)
buffer_json_member_add_string(wb, "template", string2str(df->template));
buffer_json_member_add_string(wb, "status", dyncfg_id2status(df->status));
dyncfg_cmds2json_array(df->cmds, "cmds", wb);
buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(df->source_type));
buffer_json_member_add_string(wb, "source", string2str(df->source));
buffer_json_member_add_boolean(wb, "sync", df->sync);
buffer_json_member_add_boolean(wb, "user_disabled", df->user_disabled);
buffer_json_member_add_boolean(wb, "restart_required", df->restart_required);
buffer_json_member_add_boolean(wb, "plugin_rejected", df->restart_required);
buffer_json_member_add_object(wb, "payload");
{
if (df->payload && buffer_strlen(df->payload)) {
buffer_json_member_add_boolean(wb, "available", true);
buffer_json_member_add_string(wb, "content_type", content_type_id2string(df->payload->content_type));
buffer_json_member_add_uint64(wb, "content_length", df->payload->len);
} else
buffer_json_member_add_boolean(wb, "available", false);
}
buffer_json_object_close(wb); // payload
buffer_json_member_add_uint64(wb, "saves", df->saves);
buffer_json_member_add_uint64(wb, "created_ut", df->created_ut);
buffer_json_member_add_uint64(wb, "modified_ut", df->modified_ut);
}
buffer_json_object_close(wb);
}
static void dyncfg_tree_for_host(RRDHOST *host, BUFFER *wb, const char *path, const char *id) {
size_t entries = dictionary_entries(dyncfg_globals.nodes);
size_t used = 0;
const DICTIONARY_ITEM *items[entries];
size_t restart_required = 0, plugin_rejected = 0, status_incomplete = 0, status_failed = 0;
STRING *template = NULL;
if(id && *id)
template = string_strdupz(id);
UUID host_uuid = uuid2UUID(host->host_uuid);
size_t path_len = strlen(path);
DYNCFG *df;
dfe_start_read(dyncfg_globals.nodes, df) {
if(!UUIDeq(df->host_uuid, host_uuid))
continue;
if(strncmp(string2str(df->path), path, path_len) != 0)
continue;
if(!rrd_function_available(host, string2str(df->function)))
df->status = DYNCFG_STATUS_ORPHAN;
if((id && strcmp(id, df_dfe.name) != 0) || (template && df->template != template))
continue;
items[used++] = dictionary_acquired_item_dup(dyncfg_globals.nodes, df_dfe.item);
}
dfe_done(df);
if(used > 1)
qsort(items, used, sizeof(const DICTIONARY_ITEM *), dyncfg_tree_compar);
buffer_flush(wb);
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY);
buffer_json_member_add_uint64(wb, "version", 1);
buffer_json_member_add_object(wb, "tree");
{
STRING *last_path = NULL;
for (size_t i = 0; i < used; i++) {
df = dictionary_acquired_item_value(items[i]);
if (df->path != last_path) {
last_path = df->path;
if (i)
buffer_json_object_close(wb);
buffer_json_member_add_object(wb, string2str(last_path));
}
dyncfg_to_json(df, dictionary_acquired_item_name(items[i]), wb);
if(df->status != DYNCFG_STATUS_ORPHAN) {
if (df->restart_required)
restart_required++;
if (df->plugin_rejected)
plugin_rejected++;
if (df->status == DYNCFG_STATUS_FAILED)
status_failed++;
if (df->status == DYNCFG_STATUS_INCOMPLETE)
status_incomplete++;
}
}
if (used)
buffer_json_object_close(wb);
}
buffer_json_object_close(wb); // tree
buffer_json_member_add_object(wb, "attention");
{
buffer_json_member_add_boolean(wb, "degraded", restart_required + plugin_rejected + status_failed + status_incomplete > 0);
buffer_json_member_add_uint64(wb, "restart_required", restart_required);
buffer_json_member_add_uint64(wb, "plugin_rejected", plugin_rejected);
buffer_json_member_add_uint64(wb, "status_failed", status_failed);
buffer_json_member_add_uint64(wb, "status_incomplete", status_incomplete);
}
buffer_json_object_close(wb); // attention
buffer_json_agents_v2(wb, NULL, 0, false, false);
buffer_json_finalize(wb);
for(size_t i = 0; i < used ;i++)
dictionary_acquired_item_release(dyncfg_globals.nodes, items[i]);
}
static int dyncfg_config_execute_cb(struct rrd_function_execute *rfe, void *data) {
RRDHOST *host = data;
int code;
char buf[strlen(rfe->function) + 1];
memcpy(buf, rfe->function, sizeof(buf));
char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line
size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS);
const char *config = get_word(words, num_words, 0);
const char *action = get_word(words, num_words, 1);
const char *path = get_word(words, num_words, 2);
const char *id = get_word(words, num_words, 3);
if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) {
char *msg = "invalid function call, expected: config";
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg);
code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg);
goto cleanup;
}
if(!action || !*action) {
char *msg = "invalid function call, expected: config tree";
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg);
code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg);
goto cleanup;
}
if(strcmp(action, "tree") == 0) {
if(!path || !*path)
path = "/";
if(!id || !*id)
id = NULL;
else if(!dyncfg_is_valid_id(id)) {
char *msg = "invalid id given";
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg);
code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg);
goto cleanup;
}
code = HTTP_RESP_OK;
dyncfg_tree_for_host(host, rfe->result.wb, path, id);
}
else {
code = HTTP_RESP_NOT_FOUND;
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: unknown config id '%s' in call: '%s'. This can happen if the plugin that registered the dynamic configuration is not running now.", action, rfe->function);
rrd_call_function_error(rfe->result.wb, "unknown config id given", code);
}
cleanup:
if(rfe->result.cb)
rfe->result.cb(rfe->result.wb, code, rfe->result.data);
return code;
}
// ----------------------------------------------------------------------------
// this adds a 'config' function to all leaf nodes (localhost and virtual nodes)
// which is used to serve the tree and act as a catch-all for all config calls
// for which there is no id overloaded.
void dyncfg_host_init(RRDHOST *host) {
rrd_function_add(host, NULL, PLUGINSD_FUNCTION_CONFIG, 120,
1000, "Dynamic configuration", "config",
HTTP_ACCESS_ADMIN,
true, dyncfg_config_execute_cb, host);
}