0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-14 17:48:37 +00:00
netdata_netdata/health/rrdvar.c
Costa Tsaousis f466b8aef5
DYNCFG: dynamically configured alerts ()
* cleanup alerts

* fix references

* fix references

* fix references

* load alerts once and apply them to each node

* simplify health_create_alarm_entry()

* Compile without warnings with compiler flags:

   -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self

* code re-organization and cleanup

* generate patterns when applying prototypes; give unique dyncfg names to all alerts

* eval expressions keep the source and the parsed_as as STRING pointers

* renamed host to node in dyncfg ids

* renamed host to node in dyncfg ids

* add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level

* working functionality

* code re-organization: moved health event-loop to a new file, moved health globals to health.c

* rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs

* dyncfg alert prototypes initialization for alerts

* health dyncfg split to separate file

* cleanup not-needed code

* normalize matches between parsing and json

* also detect !* for disabled alerts

* dyncfg capability disabled

* Store alert config part1

* Add rrdlabels_common_count

* wip health variables lookup without indexes

* Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter

* working variables with runtime lookup

* working variables with runtime lookup

* delete rrddimvar and rrdfamily index

* remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts

* added /api/v1/variable that resolves a variable the same way alerts do

* remove rrdcalc from eval

* remove debug code

* remove duplicate assignment

* Fix memory leak

* all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts

* hide all internal structures of EVAL

* Enable -Wformat flag

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Adjust binding for calculation, warning, critical

* Remove unused macro

* Update config hash id

* use the right info and summary in alerts log

* use synchronous queries for alerts

* Handle cases when config_hash_id is missing from health_log

* remove deadlock from health worker

* parsing to json payload for health alert prototypes

* cleaner parsing and avoiding memory leaks in case of duplicate members in json

* fix left-over rename of function

* Keep original lookup field to send to the cloud
Cleanup / rename function to store config
Remove unused DEFINEs, functions

* Use ac->lookup

* link jobs to the host when the template is registered; do not accept running a function without a host

* full dyncfg support for health alerts, except action TEST

* working dyncfg additions, updates, removals

* fixed missing source, wrong status updates

* add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint

* fix dyncfg unittest

* rename functions

* generalize the json-c parser macros and move them to libnetdata

* report progress when enabling and disabling dyncfg templates

* moved rrdcalc and rrdvar to health

* update alarms

* added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts

* enable parsed json alerts; allow sending back accepted but disabled

* added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output

* remove the RRDHOST pointer from DYNCFG

* Fix command field submitted to the cloud

* do not send updates to creation requests, for DYNCFG jobs

---------

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: ilyam8 <ilya@netdata.cloud>
2024-01-23 20:20:41 +02:00

342 lines
12 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "database/rrd.h"
typedef struct rrdvar {
NETDATA_DOUBLE value;
} RRDVAR;
// ----------------------------------------------------------------------------
// RRDVAR management
inline int rrdvar_fix_name(char *variable) {
int fixed = 0;
while(*variable) {
if (!isalnum(*variable) && *variable != '.' && *variable != '_') {
*variable++ = '_';
fixed++;
}
else
variable++;
}
return fixed;
}
inline STRING *rrdvar_name_to_string(const char *name) {
char *variable = strdupz(name);
rrdvar_fix_name(variable);
STRING *name_string = string_strdupz(variable);
freez(variable);
return name_string;
}
static bool rrdvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) {
RRDVAR *rv = old_value;
RRDVAR *nrv = new_value;
rv->value = nrv->value;
return false;
}
DICTIONARY *rrdvariables_create(void) {
DICTIONARY *dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
&dictionary_stats_category_rrdhealth, sizeof(RRDVAR));
dictionary_register_conflict_callback(dict, rrdvar_conflict_callback, NULL);
return dict;
}
void rrdvariables_destroy(DICTIONARY *dict) {
dictionary_destroy(dict);
}
static inline const RRDVAR_ACQUIRED *rrdvar_get_and_acquire(DICTIONARY *dict, STRING *name) {
return (const RRDVAR_ACQUIRED *)dictionary_get_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name));
}
inline const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(DICTIONARY *dict, STRING *name, NETDATA_DOUBLE value) {
if(unlikely(!dict || !name)) return NULL;
RRDVAR tmp = {
.value = value,
};
return (const RRDVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(
dict, string2str(name), (ssize_t)string_strlen(name),
&tmp, sizeof(tmp), NULL);
}
void rrdvar_delete_all(DICTIONARY *dict) {
dictionary_flush(dict);
}
void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) {
if(unlikely(!dict || !rva)) return; // when health is not enabled
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
}
// ----------------------------------------------------------------------------
// CUSTOM HOST VARIABLES
inline int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data) {
if(unlikely(!dict)) return 0; // when health is not enabled
return dictionary_walkthrough_read(dict, callback, data);
}
const RRDVAR_ACQUIRED *rrdvar_host_variable_add_and_acquire(RRDHOST *host, const char *name) {
if(unlikely(!host->rrdvars)) return NULL; // when health is not enabled
STRING *name_string = rrdvar_name_to_string(name);
const RRDVAR_ACQUIRED *rva = rrdvar_add_and_acquire(host->rrdvars, name_string, NAN);
string_freez(name_string);
return rva;
}
void rrdvar_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) {
if(unlikely(!host->rrdvars || !rva)) return; // when health is not enabled
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
if(rv->value != value) {
rv->value = value;
// if the host is streaming, send this variable upstream immediately
rrdpush_sender_send_this_host_variable_now(host, rva);
}
}
// ----------------------------------------------------------------------------
// CUSTOM CHART VARIABLES
const RRDVAR_ACQUIRED *rrdvar_chart_variable_add_and_acquire(RRDSET *st, const char *name) {
if(unlikely(!st->rrdvars)) return NULL;
STRING *name_string = rrdvar_name_to_string(name);
const RRDVAR_ACQUIRED *rs = rrdvar_add_and_acquire(st->rrdvars, name_string, NAN);
string_freez(name_string);
return rs;
}
void rrdvar_chart_variable_set(RRDSET *st, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) {
if(unlikely(!st->rrdvars || !rva)) return;
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
if(rv->value != value) {
rv->value = value;
rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES);
}
}
// ----------------------------------------------------------------------------
// RRDVAR lookup
NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva) {
if(unlikely(!rva)) return NAN;
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
return rv->value;
}
static inline bool rrdvar_get_value(DICTIONARY *dict, STRING *variable, NETDATA_DOUBLE *result) {
bool found = false;
const RRDVAR_ACQUIRED *rva = rrdvar_get_and_acquire(dict, variable);
if(rva) {
*result = rrdvar2number(rva);
found = true;
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
}
return found;
}
bool rrdvar_get_custom_host_variable_value(RRDHOST *host, STRING *variable, NETDATA_DOUBLE *result) {
return rrdvar_get_value(host->rrdvars, variable, result);
}
bool rrdvar_get_custom_chart_variable_value(RRDSET *st, STRING *variable, NETDATA_DOUBLE *result) {
return rrdvar_get_value(st->rrdvars, variable, result);
}
// ----------------------------------------------------------------------------
// RRDVAR to JSON
void rrdvar_to_json_members(DICTIONARY *dict, BUFFER *wb) {
RRDVAR *rv;
dfe_start_read(dict, rv) {
buffer_json_member_add_double(wb, rv_dfe.name, rv->value);
}
dfe_done(rv);
}
void health_api_v1_chart_custom_variables2json(RRDSET *st, BUFFER *buf) {
rrdvar_to_json_members(st->rrdvars, buf);
}
void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *wb) {
// FIXME this list is incomplete
// alerts can also access {context}.{dimension} from the entire host database
RRDHOST *host = st->rrdhost;
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
buffer_json_member_add_string(wb, "chart", rrdset_id(st));
buffer_json_member_add_string(wb, "chart_name", rrdset_name(st));
buffer_json_member_add_string(wb, "chart_context", rrdset_context(st));
buffer_json_member_add_string(wb, "family", rrdset_family(st));
buffer_json_member_add_string(wb, "host", rrdhost_hostname(host));
time_t now = now_realtime_sec();
buffer_json_member_add_object(wb, "current_alert_values");
{
buffer_json_member_add_double(wb, "this", NAN);
buffer_json_member_add_double(wb, "after", (NETDATA_DOUBLE)now - 1);
buffer_json_member_add_double(wb, "before", (NETDATA_DOUBLE)now);
buffer_json_member_add_double(wb, "now", (NETDATA_DOUBLE)now);
buffer_json_member_add_double(wb, "status", (NETDATA_DOUBLE)RRDCALC_STATUS_REMOVED);
buffer_json_member_add_double(wb, "REMOVED", (NETDATA_DOUBLE)RRDCALC_STATUS_REMOVED);
buffer_json_member_add_double(wb, "UNDEFINED", (NETDATA_DOUBLE)RRDCALC_STATUS_UNDEFINED);
buffer_json_member_add_double(wb, "UNINITIALIZED", (NETDATA_DOUBLE)RRDCALC_STATUS_UNINITIALIZED);
buffer_json_member_add_double(wb, "CLEAR", (NETDATA_DOUBLE)RRDCALC_STATUS_CLEAR);
buffer_json_member_add_double(wb, "WARNING", (NETDATA_DOUBLE)RRDCALC_STATUS_WARNING);
buffer_json_member_add_double(wb, "CRITICAL", (NETDATA_DOUBLE)RRDCALC_STATUS_CRITICAL);
buffer_json_member_add_double(wb, "green", NAN);
buffer_json_member_add_double(wb, "red", NAN);
}
buffer_json_object_close(wb);
buffer_json_member_add_object(wb, "dimensions_last_stored_values");
{
RRDDIM *rd;
dfe_start_read(st->rrddim_root_index, rd) {
buffer_json_member_add_double(wb, string2str(rd->id), rd->collector.last_stored_value);
if(rd->name != rd->id)
buffer_json_member_add_double(wb, string2str(rd->name), rd->collector.last_stored_value);
}
dfe_done(rd);
}
buffer_json_object_close(wb);
buffer_json_member_add_object(wb, "dimensions_last_collected_values");
{
char name[RRD_ID_LENGTH_MAX + 1 + 100];
RRDDIM *rd;
dfe_start_read(st->rrddim_root_index, rd) {
snprintfz(name, sizeof(name), "%s_raw", string2str(rd->id));
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_value);
if(rd->name != rd->id) {
snprintfz(name, sizeof(name), "%s_raw", string2str(rd->name));
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_value);
}
}
dfe_done(rd);
}
buffer_json_object_close(wb);
buffer_json_member_add_object(wb, "dimensions_last_collected_time");
{
char name[RRD_ID_LENGTH_MAX + 1 + 100];
RRDDIM *rd;
dfe_start_read(st->rrddim_root_index, rd) {
snprintfz(name, sizeof(name), "%s_last_collected_t", string2str(rd->id));
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_time.tv_sec);
if(rd->name != rd->id) {
snprintfz(name, sizeof(name), "%s_last_collected_t", string2str(rd->name));
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_time.tv_sec);
}
}
dfe_done(rd);
}
buffer_json_object_close(wb);
buffer_json_member_add_object(wb, "chart_variables");
{
buffer_json_member_add_int64(wb, "update_every", st->update_every);
buffer_json_member_add_uint64(wb, "last_collected_t", st->last_collected_time.tv_sec);
rrdvar_to_json_members(st->rrdvars, wb);
}
buffer_json_object_close(wb);
buffer_json_member_add_object(wb, "host_variables");
{
rrdvar_to_json_members(st->rrdhost->rrdvars, wb);
}
buffer_json_object_close(wb);
buffer_json_member_add_object(wb, "alerts");
{
struct scored {
bool existing;
STRING *chart;
STRING *context;
NETDATA_DOUBLE value;
size_t score;
} tmp, *z;
DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE);
RRDCALC *rc;
dfe_start_read(st->rrdhost->rrdcalc_root_index, rc) {
tmp = (struct scored) {
.existing = false,
.chart = string_dup(rc->rrdset->id),
.context = string_dup(rc->rrdset->context),
.value = rc->value,
.score = rrdlabels_common_count(rc->rrdset->rrdlabels, st->rrdlabels),
};
z = dictionary_set(dict, string2str(rc->config.name), &tmp, sizeof(tmp));
if(z->existing) {
if(tmp.score > z->score)
SWAP(*z, tmp);
z->existing = true;
string_freez(tmp.chart);
string_freez(tmp.context);
}
else
z->existing = true;
}
dfe_done(rc);
dfe_start_read(dict, z) {
buffer_json_member_add_object(wb, z_dfe.name);
{
buffer_json_member_add_double(wb, "value", z->value);
buffer_json_member_add_string(wb, "instance", string2str(z->chart));
buffer_json_member_add_string(wb, "context", string2str(z->context));
buffer_json_member_add_uint64(wb, "score", z->score);
}
buffer_json_object_close(wb);
string_freez(z->chart);
string_freez(z->context);
}
dfe_done(z);
dictionary_destroy(dict);
}
buffer_json_object_close(wb);
buffer_json_finalize(wb);
}
// ----------------------------------------------------------------------------
// RRDVAR private members examination
const char *rrdvar_name(const RRDVAR_ACQUIRED *rva) {
return dictionary_acquired_item_name((const DICTIONARY_ITEM *)rva);
}
void rrdvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb) {
rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES);
// send the chart local custom variables
RRDVAR *rv;
dfe_start_read(st->rrdvars, rv) {
buffer_sprintf(wb
, "VARIABLE CHART %s = " NETDATA_DOUBLE_FORMAT "\n"
, rv_dfe.name, rv->value
);
}
dfe_done(rv);
}