mirror of
https://github.com/netdata/netdata.git
synced 2025-04-14 17:48:37 +00:00

* cleanup alerts * fix references * fix references * fix references * load alerts once and apply them to each node * simplify health_create_alarm_entry() * Compile without warnings with compiler flags: -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self * code re-organization and cleanup * generate patterns when applying prototypes; give unique dyncfg names to all alerts * eval expressions keep the source and the parsed_as as STRING pointers * renamed host to node in dyncfg ids * renamed host to node in dyncfg ids * add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level * working functionality * code re-organization: moved health event-loop to a new file, moved health globals to health.c * rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs * dyncfg alert prototypes initialization for alerts * health dyncfg split to separate file * cleanup not-needed code * normalize matches between parsing and json * also detect !* for disabled alerts * dyncfg capability disabled * Store alert config part1 * Add rrdlabels_common_count * wip health variables lookup without indexes * Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter * working variables with runtime lookup * working variables with runtime lookup * delete rrddimvar and rrdfamily index * remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts * added /api/v1/variable that resolves a variable the same way alerts do * remove rrdcalc from eval * remove debug code * remove duplicate assignment * Fix memory leak * all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts * hide all internal structures of EVAL * Enable -Wformat flag Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> * Adjust binding for calculation, warning, critical * Remove unused macro * Update config hash id * use the right info and summary in alerts log * use synchronous queries for alerts * Handle cases when config_hash_id is missing from health_log * remove deadlock from health worker * parsing to json payload for health alert prototypes * cleaner parsing and avoiding memory leaks in case of duplicate members in json * fix left-over rename of function * Keep original lookup field to send to the cloud Cleanup / rename function to store config Remove unused DEFINEs, functions * Use ac->lookup * link jobs to the host when the template is registered; do not accept running a function without a host * full dyncfg support for health alerts, except action TEST * working dyncfg additions, updates, removals * fixed missing source, wrong status updates * add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint * fix dyncfg unittest * rename functions * generalize the json-c parser macros and move them to libnetdata * report progress when enabling and disabling dyncfg templates * moved rrdcalc and rrdvar to health * update alarms * added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts * enable parsed json alerts; allow sending back accepted but disabled * added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output * remove the RRDHOST pointer from DYNCFG * Fix command field submitted to the cloud * do not send updates to creation requests, for DYNCFG jobs --------- Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: ilyam8 <ilya@netdata.cloud>
342 lines
12 KiB
C
342 lines
12 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "database/rrd.h"
|
|
|
|
typedef struct rrdvar {
|
|
NETDATA_DOUBLE value;
|
|
} RRDVAR;
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// RRDVAR management
|
|
|
|
inline int rrdvar_fix_name(char *variable) {
|
|
int fixed = 0;
|
|
while(*variable) {
|
|
if (!isalnum(*variable) && *variable != '.' && *variable != '_') {
|
|
*variable++ = '_';
|
|
fixed++;
|
|
}
|
|
else
|
|
variable++;
|
|
}
|
|
|
|
return fixed;
|
|
}
|
|
|
|
inline STRING *rrdvar_name_to_string(const char *name) {
|
|
char *variable = strdupz(name);
|
|
rrdvar_fix_name(variable);
|
|
STRING *name_string = string_strdupz(variable);
|
|
freez(variable);
|
|
return name_string;
|
|
}
|
|
|
|
static bool rrdvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) {
|
|
RRDVAR *rv = old_value;
|
|
RRDVAR *nrv = new_value;
|
|
|
|
rv->value = nrv->value;
|
|
return false;
|
|
}
|
|
|
|
DICTIONARY *rrdvariables_create(void) {
|
|
DICTIONARY *dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
|
&dictionary_stats_category_rrdhealth, sizeof(RRDVAR));
|
|
dictionary_register_conflict_callback(dict, rrdvar_conflict_callback, NULL);
|
|
return dict;
|
|
}
|
|
|
|
void rrdvariables_destroy(DICTIONARY *dict) {
|
|
dictionary_destroy(dict);
|
|
}
|
|
|
|
static inline const RRDVAR_ACQUIRED *rrdvar_get_and_acquire(DICTIONARY *dict, STRING *name) {
|
|
return (const RRDVAR_ACQUIRED *)dictionary_get_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name));
|
|
}
|
|
|
|
inline const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(DICTIONARY *dict, STRING *name, NETDATA_DOUBLE value) {
|
|
if(unlikely(!dict || !name)) return NULL;
|
|
RRDVAR tmp = {
|
|
.value = value,
|
|
};
|
|
return (const RRDVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(
|
|
dict, string2str(name), (ssize_t)string_strlen(name),
|
|
&tmp, sizeof(tmp), NULL);
|
|
}
|
|
|
|
void rrdvar_delete_all(DICTIONARY *dict) {
|
|
dictionary_flush(dict);
|
|
}
|
|
|
|
void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) {
|
|
if(unlikely(!dict || !rva)) return; // when health is not enabled
|
|
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// CUSTOM HOST VARIABLES
|
|
|
|
inline int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data) {
|
|
if(unlikely(!dict)) return 0; // when health is not enabled
|
|
return dictionary_walkthrough_read(dict, callback, data);
|
|
}
|
|
|
|
const RRDVAR_ACQUIRED *rrdvar_host_variable_add_and_acquire(RRDHOST *host, const char *name) {
|
|
if(unlikely(!host->rrdvars)) return NULL; // when health is not enabled
|
|
|
|
STRING *name_string = rrdvar_name_to_string(name);
|
|
const RRDVAR_ACQUIRED *rva = rrdvar_add_and_acquire(host->rrdvars, name_string, NAN);
|
|
|
|
string_freez(name_string);
|
|
return rva;
|
|
}
|
|
|
|
void rrdvar_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) {
|
|
if(unlikely(!host->rrdvars || !rva)) return; // when health is not enabled
|
|
|
|
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
|
if(rv->value != value) {
|
|
rv->value = value;
|
|
|
|
// if the host is streaming, send this variable upstream immediately
|
|
rrdpush_sender_send_this_host_variable_now(host, rva);
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// CUSTOM CHART VARIABLES
|
|
|
|
const RRDVAR_ACQUIRED *rrdvar_chart_variable_add_and_acquire(RRDSET *st, const char *name) {
|
|
if(unlikely(!st->rrdvars)) return NULL;
|
|
|
|
STRING *name_string = rrdvar_name_to_string(name);
|
|
const RRDVAR_ACQUIRED *rs = rrdvar_add_and_acquire(st->rrdvars, name_string, NAN);
|
|
string_freez(name_string);
|
|
return rs;
|
|
}
|
|
|
|
void rrdvar_chart_variable_set(RRDSET *st, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) {
|
|
if(unlikely(!st->rrdvars || !rva)) return;
|
|
|
|
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
|
if(rv->value != value) {
|
|
rv->value = value;
|
|
rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES);
|
|
}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// RRDVAR lookup
|
|
|
|
NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva) {
|
|
if(unlikely(!rva)) return NAN;
|
|
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
|
return rv->value;
|
|
}
|
|
|
|
static inline bool rrdvar_get_value(DICTIONARY *dict, STRING *variable, NETDATA_DOUBLE *result) {
|
|
bool found = false;
|
|
|
|
const RRDVAR_ACQUIRED *rva = rrdvar_get_and_acquire(dict, variable);
|
|
if(rva) {
|
|
*result = rrdvar2number(rva);
|
|
found = true;
|
|
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
|
|
}
|
|
|
|
return found;
|
|
}
|
|
|
|
bool rrdvar_get_custom_host_variable_value(RRDHOST *host, STRING *variable, NETDATA_DOUBLE *result) {
|
|
return rrdvar_get_value(host->rrdvars, variable, result);
|
|
}
|
|
|
|
bool rrdvar_get_custom_chart_variable_value(RRDSET *st, STRING *variable, NETDATA_DOUBLE *result) {
|
|
return rrdvar_get_value(st->rrdvars, variable, result);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// RRDVAR to JSON
|
|
|
|
void rrdvar_to_json_members(DICTIONARY *dict, BUFFER *wb) {
|
|
RRDVAR *rv;
|
|
dfe_start_read(dict, rv) {
|
|
buffer_json_member_add_double(wb, rv_dfe.name, rv->value);
|
|
}
|
|
dfe_done(rv);
|
|
}
|
|
|
|
void health_api_v1_chart_custom_variables2json(RRDSET *st, BUFFER *buf) {
|
|
rrdvar_to_json_members(st->rrdvars, buf);
|
|
}
|
|
|
|
void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *wb) {
|
|
|
|
// FIXME this list is incomplete
|
|
// alerts can also access {context}.{dimension} from the entire host database
|
|
|
|
RRDHOST *host = st->rrdhost;
|
|
|
|
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
|
|
|
|
buffer_json_member_add_string(wb, "chart", rrdset_id(st));
|
|
buffer_json_member_add_string(wb, "chart_name", rrdset_name(st));
|
|
buffer_json_member_add_string(wb, "chart_context", rrdset_context(st));
|
|
buffer_json_member_add_string(wb, "family", rrdset_family(st));
|
|
buffer_json_member_add_string(wb, "host", rrdhost_hostname(host));
|
|
|
|
time_t now = now_realtime_sec();
|
|
|
|
buffer_json_member_add_object(wb, "current_alert_values");
|
|
{
|
|
buffer_json_member_add_double(wb, "this", NAN);
|
|
buffer_json_member_add_double(wb, "after", (NETDATA_DOUBLE)now - 1);
|
|
buffer_json_member_add_double(wb, "before", (NETDATA_DOUBLE)now);
|
|
buffer_json_member_add_double(wb, "now", (NETDATA_DOUBLE)now);
|
|
buffer_json_member_add_double(wb, "status", (NETDATA_DOUBLE)RRDCALC_STATUS_REMOVED);
|
|
buffer_json_member_add_double(wb, "REMOVED", (NETDATA_DOUBLE)RRDCALC_STATUS_REMOVED);
|
|
buffer_json_member_add_double(wb, "UNDEFINED", (NETDATA_DOUBLE)RRDCALC_STATUS_UNDEFINED);
|
|
buffer_json_member_add_double(wb, "UNINITIALIZED", (NETDATA_DOUBLE)RRDCALC_STATUS_UNINITIALIZED);
|
|
buffer_json_member_add_double(wb, "CLEAR", (NETDATA_DOUBLE)RRDCALC_STATUS_CLEAR);
|
|
buffer_json_member_add_double(wb, "WARNING", (NETDATA_DOUBLE)RRDCALC_STATUS_WARNING);
|
|
buffer_json_member_add_double(wb, "CRITICAL", (NETDATA_DOUBLE)RRDCALC_STATUS_CRITICAL);
|
|
buffer_json_member_add_double(wb, "green", NAN);
|
|
buffer_json_member_add_double(wb, "red", NAN);
|
|
}
|
|
buffer_json_object_close(wb);
|
|
|
|
buffer_json_member_add_object(wb, "dimensions_last_stored_values");
|
|
{
|
|
RRDDIM *rd;
|
|
dfe_start_read(st->rrddim_root_index, rd) {
|
|
buffer_json_member_add_double(wb, string2str(rd->id), rd->collector.last_stored_value);
|
|
if(rd->name != rd->id)
|
|
buffer_json_member_add_double(wb, string2str(rd->name), rd->collector.last_stored_value);
|
|
}
|
|
dfe_done(rd);
|
|
}
|
|
buffer_json_object_close(wb);
|
|
|
|
buffer_json_member_add_object(wb, "dimensions_last_collected_values");
|
|
{
|
|
char name[RRD_ID_LENGTH_MAX + 1 + 100];
|
|
RRDDIM *rd;
|
|
dfe_start_read(st->rrddim_root_index, rd) {
|
|
snprintfz(name, sizeof(name), "%s_raw", string2str(rd->id));
|
|
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_value);
|
|
if(rd->name != rd->id) {
|
|
snprintfz(name, sizeof(name), "%s_raw", string2str(rd->name));
|
|
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_value);
|
|
}
|
|
}
|
|
dfe_done(rd);
|
|
}
|
|
buffer_json_object_close(wb);
|
|
|
|
buffer_json_member_add_object(wb, "dimensions_last_collected_time");
|
|
{
|
|
char name[RRD_ID_LENGTH_MAX + 1 + 100];
|
|
RRDDIM *rd;
|
|
dfe_start_read(st->rrddim_root_index, rd) {
|
|
snprintfz(name, sizeof(name), "%s_last_collected_t", string2str(rd->id));
|
|
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_time.tv_sec);
|
|
if(rd->name != rd->id) {
|
|
snprintfz(name, sizeof(name), "%s_last_collected_t", string2str(rd->name));
|
|
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_time.tv_sec);
|
|
}
|
|
}
|
|
dfe_done(rd);
|
|
}
|
|
buffer_json_object_close(wb);
|
|
|
|
buffer_json_member_add_object(wb, "chart_variables");
|
|
{
|
|
buffer_json_member_add_int64(wb, "update_every", st->update_every);
|
|
buffer_json_member_add_uint64(wb, "last_collected_t", st->last_collected_time.tv_sec);
|
|
|
|
rrdvar_to_json_members(st->rrdvars, wb);
|
|
}
|
|
buffer_json_object_close(wb);
|
|
|
|
buffer_json_member_add_object(wb, "host_variables");
|
|
{
|
|
rrdvar_to_json_members(st->rrdhost->rrdvars, wb);
|
|
}
|
|
buffer_json_object_close(wb);
|
|
|
|
buffer_json_member_add_object(wb, "alerts");
|
|
{
|
|
struct scored {
|
|
bool existing;
|
|
STRING *chart;
|
|
STRING *context;
|
|
NETDATA_DOUBLE value;
|
|
size_t score;
|
|
} tmp, *z;
|
|
DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE);
|
|
|
|
RRDCALC *rc;
|
|
dfe_start_read(st->rrdhost->rrdcalc_root_index, rc) {
|
|
tmp = (struct scored) {
|
|
.existing = false,
|
|
.chart = string_dup(rc->rrdset->id),
|
|
.context = string_dup(rc->rrdset->context),
|
|
.value = rc->value,
|
|
.score = rrdlabels_common_count(rc->rrdset->rrdlabels, st->rrdlabels),
|
|
};
|
|
z = dictionary_set(dict, string2str(rc->config.name), &tmp, sizeof(tmp));
|
|
|
|
if(z->existing) {
|
|
if(tmp.score > z->score)
|
|
SWAP(*z, tmp);
|
|
z->existing = true;
|
|
string_freez(tmp.chart);
|
|
string_freez(tmp.context);
|
|
}
|
|
else
|
|
z->existing = true;
|
|
}
|
|
dfe_done(rc);
|
|
|
|
dfe_start_read(dict, z) {
|
|
buffer_json_member_add_object(wb, z_dfe.name);
|
|
{
|
|
buffer_json_member_add_double(wb, "value", z->value);
|
|
buffer_json_member_add_string(wb, "instance", string2str(z->chart));
|
|
buffer_json_member_add_string(wb, "context", string2str(z->context));
|
|
buffer_json_member_add_uint64(wb, "score", z->score);
|
|
}
|
|
buffer_json_object_close(wb);
|
|
|
|
string_freez(z->chart);
|
|
string_freez(z->context);
|
|
}
|
|
dfe_done(z);
|
|
|
|
dictionary_destroy(dict);
|
|
}
|
|
buffer_json_object_close(wb);
|
|
|
|
buffer_json_finalize(wb);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// RRDVAR private members examination
|
|
|
|
const char *rrdvar_name(const RRDVAR_ACQUIRED *rva) {
|
|
return dictionary_acquired_item_name((const DICTIONARY_ITEM *)rva);
|
|
}
|
|
|
|
void rrdvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb) {
|
|
rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES);
|
|
|
|
// send the chart local custom variables
|
|
RRDVAR *rv;
|
|
dfe_start_read(st->rrdvars, rv) {
|
|
buffer_sprintf(wb
|
|
, "VARIABLE CHART %s = " NETDATA_DOUBLE_FORMAT "\n"
|
|
, rv_dfe.name, rv->value
|
|
);
|
|
}
|
|
dfe_done(rv);
|
|
}
|