mirror of
https://github.com/netdata/netdata.git
synced 2025-04-15 01:58:34 +00:00

* cleanup alerts * fix references * fix references * fix references * load alerts once and apply them to each node * simplify health_create_alarm_entry() * Compile without warnings with compiler flags: -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self * code re-organization and cleanup * generate patterns when applying prototypes; give unique dyncfg names to all alerts * eval expressions keep the source and the parsed_as as STRING pointers * renamed host to node in dyncfg ids * renamed host to node in dyncfg ids * add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level * working functionality * code re-organization: moved health event-loop to a new file, moved health globals to health.c * rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs * dyncfg alert prototypes initialization for alerts * health dyncfg split to separate file * cleanup not-needed code * normalize matches between parsing and json * also detect !* for disabled alerts * dyncfg capability disabled * Store alert config part1 * Add rrdlabels_common_count * wip health variables lookup without indexes * Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter * working variables with runtime lookup * working variables with runtime lookup * delete rrddimvar and rrdfamily index * remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts * added /api/v1/variable that resolves a variable the same way alerts do * remove rrdcalc from eval * remove debug code * remove duplicate assignment * Fix memory leak * all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts * hide all internal structures of EVAL * Enable -Wformat flag Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> * Adjust binding for calculation, warning, critical * Remove unused macro * Update config hash id * use the right info and summary in alerts log * use synchronous queries for alerts * Handle cases when config_hash_id is missing from health_log * remove deadlock from health worker * parsing to json payload for health alert prototypes * cleaner parsing and avoiding memory leaks in case of duplicate members in json * fix left-over rename of function * Keep original lookup field to send to the cloud Cleanup / rename function to store config Remove unused DEFINEs, functions * Use ac->lookup * link jobs to the host when the template is registered; do not accept running a function without a host * full dyncfg support for health alerts, except action TEST * working dyncfg additions, updates, removals * fixed missing source, wrong status updates * add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint * fix dyncfg unittest * rename functions * generalize the json-c parser macros and move them to libnetdata * report progress when enabling and disabling dyncfg templates * moved rrdcalc and rrdvar to health * update alarms * added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts * enable parsed json alerts; allow sending back accepted but disabled * added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output * remove the RRDHOST pointer from DYNCFG * Fix command field submitted to the cloud * do not send updates to creation requests, for DYNCFG jobs --------- Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: ilyam8 <ilya@netdata.cloud>
146 lines
5.9 KiB
C
146 lines
5.9 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "../database/rrd.h"
|
|
#include "../web/api/queries/rrdr.h"
|
|
#include "health_prototypes.h"
|
|
|
|
#ifndef NETDATA_RRDCALC_H
|
|
#define NETDATA_RRDCALC_H 1
|
|
|
|
// calculated variables (defined in health configuration)
|
|
// These aggregate time-series data at fixed intervals
|
|
// (defined in their update_every member below)
|
|
// They increase the overhead of netdata.
|
|
//
|
|
// These calculations are stored under RRDHOST.
|
|
// Then are also linked to RRDSET (of course only when a
|
|
// matching chart is found).
|
|
|
|
typedef enum rrdcalc_status {
|
|
RRDCALC_STATUS_REMOVED = -2,
|
|
RRDCALC_STATUS_UNDEFINED = -1,
|
|
RRDCALC_STATUS_UNINITIALIZED = 0,
|
|
RRDCALC_STATUS_CLEAR = 1,
|
|
RRDCALC_STATUS_RAISED = 2, // DO NOT CHANGE THESE NUMBERS
|
|
RRDCALC_STATUS_WARNING = 3, // DO NOT CHANGE THESE NUMBERS
|
|
RRDCALC_STATUS_CRITICAL = 4, // DO NOT CHANGE THESE NUMBERS
|
|
} RRDCALC_STATUS;
|
|
|
|
typedef enum {
|
|
RRDCALC_FLAG_DB_ERROR = (1 << 0),
|
|
RRDCALC_FLAG_DB_NAN = (1 << 1),
|
|
// RRDCALC_FLAG_DB_STALE = (1 << 2),
|
|
RRDCALC_FLAG_CALC_ERROR = (1 << 3),
|
|
RRDCALC_FLAG_WARN_ERROR = (1 << 4),
|
|
RRDCALC_FLAG_CRIT_ERROR = (1 << 5),
|
|
RRDCALC_FLAG_RUNNABLE = (1 << 6),
|
|
RRDCALC_FLAG_DISABLED = (1 << 7),
|
|
RRDCALC_FLAG_SILENCED = (1 << 8),
|
|
RRDCALC_FLAG_RUN_ONCE = (1 << 9),
|
|
} RRDCALC_FLAGS;
|
|
void rrdcalc_flags_to_json_array(BUFFER *wb, const char *key, RRDCALC_FLAGS flags);
|
|
|
|
#define RRDCALC_ALL_OPTIONS_EXCLUDING_THE_RRDR_ONES (RRDCALC_OPTION_NO_CLEAR_NOTIFICATION)
|
|
|
|
struct rrdcalc {
|
|
uint32_t id; // the unique id of this alarm
|
|
uint32_t next_event_id; // the next event id that will be used for this alarm
|
|
|
|
STRING *key; // the unique key in the host's rrdcalc_root_index
|
|
STRING *chart; // the chart id this should be linked to
|
|
|
|
struct rrd_alert_match match;
|
|
struct rrd_alert_config config;
|
|
|
|
// ------------------------------------------------------------------------
|
|
// runtime information
|
|
|
|
STRING *summary; // the original summary field before any variable replacement
|
|
STRING *info; // the original info field before any variable replacement
|
|
|
|
RRDCALC_STATUS old_status; // the old status of the alarm
|
|
RRDCALC_STATUS status; // the current status of the alarm
|
|
|
|
NETDATA_DOUBLE value; // the current value of the alarm
|
|
NETDATA_DOUBLE old_value; // the previous value of the alarm
|
|
NETDATA_DOUBLE last_status_change_value; // the value at the last status change
|
|
|
|
RRDCALC_FLAGS run_flags; // check RRDCALC_FLAG_*
|
|
|
|
time_t last_updated; // the last update timestamp of the alarm
|
|
time_t next_update; // the next update timestamp of the alarm
|
|
time_t last_status_change; // the timestamp of the last time this alarm changed status
|
|
time_t last_repeat; // the last time the alarm got repeated
|
|
uint32_t times_repeat; // number of times the alarm got repeated
|
|
|
|
time_t db_after; // the first timestamp evaluated by the db lookup
|
|
time_t db_before; // the last timestamp evaluated by the db lookup
|
|
|
|
time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications
|
|
int delay_up_current; // the current up notification delay duration
|
|
int delay_down_current; // the current down notification delay duration
|
|
int delay_last; // the last delay we used
|
|
|
|
// ------------------------------------------------------------------------
|
|
// the chart this alarm it is linked to
|
|
|
|
size_t labels_version;
|
|
struct rrdset *rrdset;
|
|
|
|
struct rrdcalc *next;
|
|
struct rrdcalc *prev;
|
|
};
|
|
|
|
#define rrdcalc_name(rc) string2str((rc)->config.name)
|
|
#define rrdcalc_chart_name(rc) string2str((rc)->chart)
|
|
#define rrdcalc_exec(rc) string2str((rc)->config.exec)
|
|
#define rrdcalc_recipient(rc) string2str((rc)->config.recipient)
|
|
#define rrdcalc_classification(rc) string2str((rc)->config.classification)
|
|
#define rrdcalc_component(rc) string2str((rc)->config.component)
|
|
#define rrdcalc_type(rc) string2str((rc)->config.type)
|
|
#define rrdcalc_source(rc) string2str((rc)->config.source)
|
|
#define rrdcalc_units(rc) string2str((rc)->config.units)
|
|
#define rrdcalc_dimensions(rc) string2str((rc)->config.dimensions)
|
|
|
|
#define foreach_rrdcalc_in_rrdhost_read(host, rc) \
|
|
dfe_start_read((host)->rrdcalc_root_index, rc) \
|
|
|
|
#define foreach_rrdcalc_in_rrdhost_reentrant(host, rc) \
|
|
dfe_start_reentrant((host)->rrdcalc_root_index, rc)
|
|
|
|
#define foreach_rrdcalc_in_rrdhost_done(rc) \
|
|
dfe_done(rc)
|
|
|
|
#define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->config.after)
|
|
|
|
void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc);
|
|
|
|
const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name);
|
|
void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca);
|
|
RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca);
|
|
|
|
const char *rrdcalc_status2string(RRDCALC_STATUS status);
|
|
|
|
uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id);
|
|
|
|
static inline int rrdcalc_isrepeating(RRDCALC *rc) {
|
|
if (unlikely(rc->config.warn_repeat_every > 0 || rc->config.crit_repeat_every > 0)) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void rrdcalc_unlink_and_delete_all_rrdset_alerts(RRDSET *st);
|
|
void rrdcalc_delete_all(RRDHOST *host);
|
|
|
|
void rrdcalc_rrdhost_index_init(RRDHOST *host);
|
|
void rrdcalc_rrdhost_index_destroy(RRDHOST *host);
|
|
|
|
void rrdcalc_unlink_and_delete(RRDHOST *host, RRDCALC *rc, bool having_ll_wrlock);
|
|
|
|
#define RRDCALC_VAR_MAX 100
|
|
#define RRDCALC_VAR_FAMILY "${family}"
|
|
#define RRDCALC_VAR_LABEL "${label:"
|
|
#define RRDCALC_VAR_LABEL_LEN (sizeof(RRDCALC_VAR_LABEL)-1)
|
|
|
|
#endif //NETDATA_RRDCALC_H
|