mirror of
https://github.com/netdata/netdata.git
synced 2025-04-15 10:04:15 +00:00

* cleanup alerts * fix references * fix references * fix references * load alerts once and apply them to each node * simplify health_create_alarm_entry() * Compile without warnings with compiler flags: -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self * code re-organization and cleanup * generate patterns when applying prototypes; give unique dyncfg names to all alerts * eval expressions keep the source and the parsed_as as STRING pointers * renamed host to node in dyncfg ids * renamed host to node in dyncfg ids * add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level * working functionality * code re-organization: moved health event-loop to a new file, moved health globals to health.c * rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs * dyncfg alert prototypes initialization for alerts * health dyncfg split to separate file * cleanup not-needed code * normalize matches between parsing and json * also detect !* for disabled alerts * dyncfg capability disabled * Store alert config part1 * Add rrdlabels_common_count * wip health variables lookup without indexes * Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter * working variables with runtime lookup * working variables with runtime lookup * delete rrddimvar and rrdfamily index * remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts * added /api/v1/variable that resolves a variable the same way alerts do * remove rrdcalc from eval * remove debug code * remove duplicate assignment * Fix memory leak * all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts * hide all internal structures of EVAL * Enable -Wformat flag Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> * Adjust binding for calculation, warning, critical * Remove unused macro * Update config hash id * use the right info and summary in alerts log * use synchronous queries for alerts * Handle cases when config_hash_id is missing from health_log * remove deadlock from health worker * parsing to json payload for health alert prototypes * cleaner parsing and avoiding memory leaks in case of duplicate members in json * fix left-over rename of function * Keep original lookup field to send to the cloud Cleanup / rename function to store config Remove unused DEFINEs, functions * Use ac->lookup * link jobs to the host when the template is registered; do not accept running a function without a host * full dyncfg support for health alerts, except action TEST * working dyncfg additions, updates, removals * fixed missing source, wrong status updates * add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint * fix dyncfg unittest * rename functions * generalize the json-c parser macros and move them to libnetdata * report progress when enabling and disabling dyncfg templates * moved rrdcalc and rrdvar to health * update alarms * added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts * enable parsed json alerts; allow sending back accepted but disabled * added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output * remove the RRDHOST pointer from DYNCFG * Fix command field submitted to the cloud * do not send updates to creation requests, for DYNCFG jobs --------- Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: ilyam8 <ilya@netdata.cloud>
130 lines
4.5 KiB
C
130 lines
4.5 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#ifndef NETDATA_HEALTH_INTERNALS_H
|
|
#define NETDATA_HEALTH_INTERNALS_H
|
|
|
|
#include "health.h"
|
|
|
|
#define HEALTH_LOG_ENTRIES_DEFAULT 1000U
|
|
#define HEALTH_LOG_ENTRIES_MAX 100000U
|
|
#define HEALTH_LOG_ENTRIES_MIN 10U
|
|
|
|
#define HEALTH_LOG_HISTORY_DEFAULT (5 * 86400)
|
|
|
|
#define HEALTH_CONF_MAX_LINE 4096
|
|
|
|
#define HEALTH_ALARM_KEY "alarm"
|
|
#define HEALTH_TEMPLATE_KEY "template"
|
|
#define HEALTH_CHART_KEY "chart"
|
|
#define HEALTH_CONTEXT_KEY "context"
|
|
#define HEALTH_ON_KEY "on"
|
|
#define HEALTH_HOST_KEY "hosts"
|
|
#define HEALTH_OS_KEY "os"
|
|
#define HEALTH_PLUGIN_KEY "plugin"
|
|
#define HEALTH_MODULE_KEY "module"
|
|
#define HEALTH_CHARTS_KEY "charts"
|
|
#define HEALTH_LOOKUP_KEY "lookup"
|
|
#define HEALTH_CALC_KEY "calc"
|
|
#define HEALTH_EVERY_KEY "every"
|
|
#define HEALTH_GREEN_KEY "green"
|
|
#define HEALTH_RED_KEY "red"
|
|
#define HEALTH_WARN_KEY "warn"
|
|
#define HEALTH_CRIT_KEY "crit"
|
|
#define HEALTH_EXEC_KEY "exec"
|
|
#define HEALTH_RECIPIENT_KEY "to"
|
|
#define HEALTH_UNITS_KEY "units"
|
|
#define HEALTH_SUMMARY_KEY "summary"
|
|
#define HEALTH_INFO_KEY "info"
|
|
#define HEALTH_CLASS_KEY "class"
|
|
#define HEALTH_COMPONENT_KEY "component"
|
|
#define HEALTH_TYPE_KEY "type"
|
|
#define HEALTH_DELAY_KEY "delay"
|
|
#define HEALTH_OPTIONS_KEY "options"
|
|
#define HEALTH_REPEAT_KEY "repeat"
|
|
#define HEALTH_HOST_LABEL_KEY "host labels"
|
|
#define HEALTH_FOREACH_KEY "foreach"
|
|
#define HEALTH_CHART_LABEL_KEY "chart labels"
|
|
|
|
void alert_action_options_to_buffer_json_array(BUFFER *wb, const char *key, ALERT_ACTION_OPTIONS options);
|
|
ALERT_ACTION_OPTIONS alert_action_options_parse(char *o);
|
|
ALERT_ACTION_OPTIONS alert_action_options_parse_one(const char *o);
|
|
|
|
typedef struct rrd_alert_prototype {
|
|
struct rrd_alert_match match;
|
|
struct rrd_alert_config config;
|
|
|
|
struct {
|
|
uint32_t uses;
|
|
bool enabled;
|
|
bool is_on_disk;
|
|
SPINLOCK spinlock;
|
|
struct rrd_alert_prototype *prev, *next;
|
|
} _internal;
|
|
} RRD_ALERT_PROTOTYPE;
|
|
bool health_prototype_add(RRD_ALERT_PROTOTYPE *ap);
|
|
void health_prototype_cleanup(RRD_ALERT_PROTOTYPE *ap);
|
|
void health_prototype_free(RRD_ALERT_PROTOTYPE *ap);
|
|
|
|
struct health_plugin_globals {
|
|
struct {
|
|
SPINLOCK spinlock;
|
|
bool done;
|
|
} initialization;
|
|
|
|
struct {
|
|
bool enabled;
|
|
bool stock_enabled;
|
|
bool use_summary_for_notifications;
|
|
|
|
unsigned int health_log_entries_max;
|
|
uint32_t health_log_history; // the health log history in seconds to be kept in db
|
|
|
|
STRING *silencers_filename;
|
|
STRING *default_exec;
|
|
STRING *default_recipient;
|
|
|
|
SIMPLE_PATTERN *enabled_alerts;
|
|
|
|
uint32_t default_warn_repeat_every; // the default value for the interval between repeating warning notifications
|
|
uint32_t default_crit_repeat_every; // the default value for the interval between repeating critical notifications
|
|
|
|
int32_t run_at_least_every_seconds;
|
|
int32_t postpone_alarms_during_hibernation_for_seconds;
|
|
} config;
|
|
|
|
struct {
|
|
DICTIONARY *dict;
|
|
} prototypes;
|
|
};
|
|
|
|
extern struct health_plugin_globals health_globals;
|
|
|
|
int health_readfile(const char *filename, void *data, bool stock_config);
|
|
void unlink_alarm_notify_in_progress(ALARM_ENTRY *ae);
|
|
void wait_for_all_notifications_to_finish_before_allowing_health_to_be_cleaned_up(void);
|
|
|
|
void health_alarm_wait_for_execution(ALARM_ENTRY *ae);
|
|
|
|
bool rrdcalc_add_from_prototype(RRDHOST *host, RRDSET *st, RRD_ALERT_PROTOTYPE *ap);
|
|
|
|
int dyncfg_health_cb(const char *transaction, const char *id, DYNCFG_CMDS cmd, const char *add_name,
|
|
BUFFER *payload, usec_t *stop_monotonic_ut, bool *cancelled,
|
|
BUFFER *result, const char *source, void *data);
|
|
|
|
void health_dyncfg_unregister_all_prototypes(void);
|
|
void health_dyncfg_register_all_prototypes(void);
|
|
void health_prototype_to_json(BUFFER *wb, RRD_ALERT_PROTOTYPE *ap, bool for_hashing);
|
|
|
|
bool alert_variable_lookup(STRING *variable, void *data, NETDATA_DOUBLE *result);
|
|
|
|
struct health_raised_summary;
|
|
struct health_raised_summary *alerts_raised_summary_create(RRDHOST *host);
|
|
void alerts_raised_summary_populate(struct health_raised_summary *hrm);
|
|
void alerts_raised_summary_free(struct health_raised_summary *hrm);
|
|
void health_send_notification(RRDHOST *host, ALARM_ENTRY *ae, struct health_raised_summary *hrm);
|
|
void health_alarm_log_process_to_send_notifications(RRDHOST *host, struct health_raised_summary *hrm);
|
|
|
|
void health_apply_prototype_to_host(RRDHOST *host, RRD_ALERT_PROTOTYPE *ap);
|
|
void health_prototype_apply_to_all_hosts(RRD_ALERT_PROTOTYPE *ap);
|
|
|
|
#endif //NETDATA_HEALTH_INTERNALS_H
|