0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-15 01:58:34 +00:00
netdata_netdata/health/rrdcalc.h
Costa Tsaousis f466b8aef5
DYNCFG: dynamically configured alerts ()
* cleanup alerts

* fix references

* fix references

* fix references

* load alerts once and apply them to each node

* simplify health_create_alarm_entry()

* Compile without warnings with compiler flags:

   -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self

* code re-organization and cleanup

* generate patterns when applying prototypes; give unique dyncfg names to all alerts

* eval expressions keep the source and the parsed_as as STRING pointers

* renamed host to node in dyncfg ids

* renamed host to node in dyncfg ids

* add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level

* working functionality

* code re-organization: moved health event-loop to a new file, moved health globals to health.c

* rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs

* dyncfg alert prototypes initialization for alerts

* health dyncfg split to separate file

* cleanup not-needed code

* normalize matches between parsing and json

* also detect !* for disabled alerts

* dyncfg capability disabled

* Store alert config part1

* Add rrdlabels_common_count

* wip health variables lookup without indexes

* Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter

* working variables with runtime lookup

* working variables with runtime lookup

* delete rrddimvar and rrdfamily index

* remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts

* added /api/v1/variable that resolves a variable the same way alerts do

* remove rrdcalc from eval

* remove debug code

* remove duplicate assignment

* Fix memory leak

* all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts

* hide all internal structures of EVAL

* Enable -Wformat flag

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Adjust binding for calculation, warning, critical

* Remove unused macro

* Update config hash id

* use the right info and summary in alerts log

* use synchronous queries for alerts

* Handle cases when config_hash_id is missing from health_log

* remove deadlock from health worker

* parsing to json payload for health alert prototypes

* cleaner parsing and avoiding memory leaks in case of duplicate members in json

* fix left-over rename of function

* Keep original lookup field to send to the cloud
Cleanup / rename function to store config
Remove unused DEFINEs, functions

* Use ac->lookup

* link jobs to the host when the template is registered; do not accept running a function without a host

* full dyncfg support for health alerts, except action TEST

* working dyncfg additions, updates, removals

* fixed missing source, wrong status updates

* add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint

* fix dyncfg unittest

* rename functions

* generalize the json-c parser macros and move them to libnetdata

* report progress when enabling and disabling dyncfg templates

* moved rrdcalc and rrdvar to health

* update alarms

* added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts

* enable parsed json alerts; allow sending back accepted but disabled

* added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output

* remove the RRDHOST pointer from DYNCFG

* Fix command field submitted to the cloud

* do not send updates to creation requests, for DYNCFG jobs

---------

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: ilyam8 <ilya@netdata.cloud>
2024-01-23 20:20:41 +02:00

146 lines
5.9 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "../database/rrd.h"
#include "../web/api/queries/rrdr.h"
#include "health_prototypes.h"
#ifndef NETDATA_RRDCALC_H
#define NETDATA_RRDCALC_H 1
// calculated variables (defined in health configuration)
// These aggregate time-series data at fixed intervals
// (defined in their update_every member below)
// They increase the overhead of netdata.
//
// These calculations are stored under RRDHOST.
// Then are also linked to RRDSET (of course only when a
// matching chart is found).
typedef enum rrdcalc_status {
RRDCALC_STATUS_REMOVED = -2,
RRDCALC_STATUS_UNDEFINED = -1,
RRDCALC_STATUS_UNINITIALIZED = 0,
RRDCALC_STATUS_CLEAR = 1,
RRDCALC_STATUS_RAISED = 2, // DO NOT CHANGE THESE NUMBERS
RRDCALC_STATUS_WARNING = 3, // DO NOT CHANGE THESE NUMBERS
RRDCALC_STATUS_CRITICAL = 4, // DO NOT CHANGE THESE NUMBERS
} RRDCALC_STATUS;
typedef enum {
RRDCALC_FLAG_DB_ERROR = (1 << 0),
RRDCALC_FLAG_DB_NAN = (1 << 1),
// RRDCALC_FLAG_DB_STALE = (1 << 2),
RRDCALC_FLAG_CALC_ERROR = (1 << 3),
RRDCALC_FLAG_WARN_ERROR = (1 << 4),
RRDCALC_FLAG_CRIT_ERROR = (1 << 5),
RRDCALC_FLAG_RUNNABLE = (1 << 6),
RRDCALC_FLAG_DISABLED = (1 << 7),
RRDCALC_FLAG_SILENCED = (1 << 8),
RRDCALC_FLAG_RUN_ONCE = (1 << 9),
} RRDCALC_FLAGS;
void rrdcalc_flags_to_json_array(BUFFER *wb, const char *key, RRDCALC_FLAGS flags);
#define RRDCALC_ALL_OPTIONS_EXCLUDING_THE_RRDR_ONES (RRDCALC_OPTION_NO_CLEAR_NOTIFICATION)
struct rrdcalc {
uint32_t id; // the unique id of this alarm
uint32_t next_event_id; // the next event id that will be used for this alarm
STRING *key; // the unique key in the host's rrdcalc_root_index
STRING *chart; // the chart id this should be linked to
struct rrd_alert_match match;
struct rrd_alert_config config;
// ------------------------------------------------------------------------
// runtime information
STRING *summary; // the original summary field before any variable replacement
STRING *info; // the original info field before any variable replacement
RRDCALC_STATUS old_status; // the old status of the alarm
RRDCALC_STATUS status; // the current status of the alarm
NETDATA_DOUBLE value; // the current value of the alarm
NETDATA_DOUBLE old_value; // the previous value of the alarm
NETDATA_DOUBLE last_status_change_value; // the value at the last status change
RRDCALC_FLAGS run_flags; // check RRDCALC_FLAG_*
time_t last_updated; // the last update timestamp of the alarm
time_t next_update; // the next update timestamp of the alarm
time_t last_status_change; // the timestamp of the last time this alarm changed status
time_t last_repeat; // the last time the alarm got repeated
uint32_t times_repeat; // number of times the alarm got repeated
time_t db_after; // the first timestamp evaluated by the db lookup
time_t db_before; // the last timestamp evaluated by the db lookup
time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications
int delay_up_current; // the current up notification delay duration
int delay_down_current; // the current down notification delay duration
int delay_last; // the last delay we used
// ------------------------------------------------------------------------
// the chart this alarm it is linked to
size_t labels_version;
struct rrdset *rrdset;
struct rrdcalc *next;
struct rrdcalc *prev;
};
#define rrdcalc_name(rc) string2str((rc)->config.name)
#define rrdcalc_chart_name(rc) string2str((rc)->chart)
#define rrdcalc_exec(rc) string2str((rc)->config.exec)
#define rrdcalc_recipient(rc) string2str((rc)->config.recipient)
#define rrdcalc_classification(rc) string2str((rc)->config.classification)
#define rrdcalc_component(rc) string2str((rc)->config.component)
#define rrdcalc_type(rc) string2str((rc)->config.type)
#define rrdcalc_source(rc) string2str((rc)->config.source)
#define rrdcalc_units(rc) string2str((rc)->config.units)
#define rrdcalc_dimensions(rc) string2str((rc)->config.dimensions)
#define foreach_rrdcalc_in_rrdhost_read(host, rc) \
dfe_start_read((host)->rrdcalc_root_index, rc) \
#define foreach_rrdcalc_in_rrdhost_reentrant(host, rc) \
dfe_start_reentrant((host)->rrdcalc_root_index, rc)
#define foreach_rrdcalc_in_rrdhost_done(rc) \
dfe_done(rc)
#define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->config.after)
void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc);
const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name);
void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca);
RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca);
const char *rrdcalc_status2string(RRDCALC_STATUS status);
uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id);
static inline int rrdcalc_isrepeating(RRDCALC *rc) {
if (unlikely(rc->config.warn_repeat_every > 0 || rc->config.crit_repeat_every > 0)) {
return 1;
}
return 0;
}
void rrdcalc_unlink_and_delete_all_rrdset_alerts(RRDSET *st);
void rrdcalc_delete_all(RRDHOST *host);
void rrdcalc_rrdhost_index_init(RRDHOST *host);
void rrdcalc_rrdhost_index_destroy(RRDHOST *host);
void rrdcalc_unlink_and_delete(RRDHOST *host, RRDCALC *rc, bool having_ll_wrlock);
#define RRDCALC_VAR_MAX 100
#define RRDCALC_VAR_FAMILY "${family}"
#define RRDCALC_VAR_LABEL "${label:"
#define RRDCALC_VAR_LABEL_LEN (sizeof(RRDCALC_VAR_LABEL)-1)
#endif //NETDATA_RRDCALC_H