mirror of
https://github.com/netdata/netdata.git
synced 2025-04-04 05:25:31 +00:00
DYNCFG: dynamically configured alerts (#16779)
* cleanup alerts * fix references * fix references * fix references * load alerts once and apply them to each node * simplify health_create_alarm_entry() * Compile without warnings with compiler flags: -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self * code re-organization and cleanup * generate patterns when applying prototypes; give unique dyncfg names to all alerts * eval expressions keep the source and the parsed_as as STRING pointers * renamed host to node in dyncfg ids * renamed host to node in dyncfg ids * add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level * working functionality * code re-organization: moved health event-loop to a new file, moved health globals to health.c * rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs * dyncfg alert prototypes initialization for alerts * health dyncfg split to separate file * cleanup not-needed code * normalize matches between parsing and json * also detect !* for disabled alerts * dyncfg capability disabled * Store alert config part1 * Add rrdlabels_common_count * wip health variables lookup without indexes * Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter * working variables with runtime lookup * working variables with runtime lookup * delete rrddimvar and rrdfamily index * remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts * added /api/v1/variable that resolves a variable the same way alerts do * remove rrdcalc from eval * remove debug code * remove duplicate assignment * Fix memory leak * all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts * hide all internal structures of EVAL * Enable -Wformat flag Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> * Adjust binding for calculation, warning, critical * Remove unused macro * Update config hash id * use the right info and summary in alerts log * use synchronous queries for alerts * Handle cases when config_hash_id is missing from health_log * remove deadlock from health worker * parsing to json payload for health alert prototypes * cleaner parsing and avoiding memory leaks in case of duplicate members in json * fix left-over rename of function * Keep original lookup field to send to the cloud Cleanup / rename function to store config Remove unused DEFINEs, functions * Use ac->lookup * link jobs to the host when the template is registered; do not accept running a function without a host * full dyncfg support for health alerts, except action TEST * working dyncfg additions, updates, removals * fixed missing source, wrong status updates * add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint * fix dyncfg unittest * rename functions * generalize the json-c parser macros and move them to libnetdata * report progress when enabling and disabling dyncfg templates * moved rrdcalc and rrdvar to health * update alarms * added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts * enable parsed json alerts; allow sending back accepted but disabled * added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output * remove the RRDHOST pointer from DYNCFG * Fix command field submitted to the cloud * do not send updates to creation requests, for DYNCFG jobs --------- Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: ilyam8 <ilya@netdata.cloud>
This commit is contained in:
parent
33412db1f5
commit
f466b8aef5
129 changed files with 7244 additions and 6798 deletions
CMakeLists.txt
aclk
collectors
apps.plugin
cgroups.plugin
freeipmi.plugin
plugins.d
proc.plugin
ipc.cproc_loadavg.cproc_net_dev.cproc_net_netstat.cproc_net_sockstat.cproc_net_stat_conntrack.cproc_stat.csys_class_drm.csys_class_infiniband.c
statsd.plugin
systemd-journal.plugin
daemon
analytics.ccommands.ccommon.h
config
dyncfg-echo.cdyncfg-files.cdyncfg-inline.cdyncfg-intercept.cdyncfg-internals.hdyncfg-tree.cdyncfg-unittest.cdyncfg.c
main.cservice.cunit_test.cdatabase
contexts
engine
rrd.hrrdcalc.crrdcalc.hrrdcalctemplate.crrdcalctemplate.hrrddim.crrddimvar.crrddimvar.hrrdfamily.crrdfunctions-inflight.crrdhost.crrdlabels.crrdlabels.hrrdset.crrdsetvar.crrdsetvar.hrrdvar.crrdvar.hsqlite
exporting/prometheus
health
health.c
health.d
health.hhealth_config.chealth_dyncfg.chealth_event_loop.chealth_internals.hhealth_json.chealth_log.chealth_notifications.chealth_prototypes.chealth_prototypes.hhealth_silencers.chealth_silencers.hhealth_variable.crrdcalc.crrdcalc.hrrdvar.crrdvar.hschema.d
libnetdata
|
@ -595,8 +595,6 @@ set(LIBNETDATA_FILES
|
|||
libnetdata/functions_evloop/functions_evloop.h
|
||||
libnetdata/gorilla/gorilla.cc
|
||||
libnetdata/gorilla/gorilla.h
|
||||
libnetdata/health/health.c
|
||||
libnetdata/health/health.h
|
||||
libnetdata/july/july.c
|
||||
libnetdata/july/july.h
|
||||
libnetdata/inlined.h
|
||||
|
@ -657,6 +655,7 @@ set(LIBNETDATA_FILES
|
|||
libnetdata/http/content_type.h
|
||||
libnetdata/config/dyncfg.c
|
||||
libnetdata/config/dyncfg.h
|
||||
libnetdata/json/json-c-parser-inline.h
|
||||
)
|
||||
|
||||
if(ENABLE_PLUGIN_EBPF)
|
||||
|
@ -863,7 +862,6 @@ set(API_PLUGIN_FILES
|
|||
web/api/formatters/charts2json.h
|
||||
web/api/formatters/rrdset2json.c
|
||||
web/api/formatters/rrdset2json.h
|
||||
web/api/health/health_cmdapi.c
|
||||
web/api/ilove/ilove.c
|
||||
web/api/ilove/ilove.h
|
||||
web/rtc/webrtc.c
|
||||
|
@ -896,6 +894,19 @@ set(HEALTH_PLUGIN_FILES
|
|||
health/health_config.c
|
||||
health/health_json.c
|
||||
health/health_log.c
|
||||
health/health_prototypes.c
|
||||
health/health_prototypes.h
|
||||
health/health_silencers.c
|
||||
health/health_silencers.h
|
||||
health/health_internals.h
|
||||
health/health_notifications.c
|
||||
health/health_event_loop.c
|
||||
health/health_dyncfg.c
|
||||
health/health_variable.c
|
||||
health/rrdcalc.c
|
||||
health/rrdcalc.h
|
||||
health/rrdvar.c
|
||||
health/rrdvar.h
|
||||
)
|
||||
|
||||
set(IDLEJITTER_PLUGIN_FILES collectors/idlejitter.plugin/plugin_idlejitter.c)
|
||||
|
@ -944,16 +955,9 @@ set(RRD_PLUGIN_FILES
|
|||
database/contexts/rrdcontext.c
|
||||
database/contexts/rrdcontext.h
|
||||
database/contexts/worker.c
|
||||
database/rrdcalc.c
|
||||
database/rrdcalc.h
|
||||
database/rrdcalctemplate.c
|
||||
database/rrdcalctemplate.h
|
||||
database/rrdcollector.c
|
||||
database/rrdcollector.h
|
||||
database/rrddim.c
|
||||
database/rrddimvar.c
|
||||
database/rrddimvar.h
|
||||
database/rrdfamily.c
|
||||
database/rrdfunctions.c
|
||||
database/rrdfunctions.h
|
||||
database/rrdfunctions-inline.c
|
||||
|
@ -967,10 +971,6 @@ set(RRD_PLUGIN_FILES
|
|||
database/rrd.c
|
||||
database/rrd.h
|
||||
database/rrdset.c
|
||||
database/rrdsetvar.c
|
||||
database/rrdsetvar.h
|
||||
database/rrdvar.c
|
||||
database/rrdvar.h
|
||||
database/storage_engine.c
|
||||
database/storage_engine.h
|
||||
database/ram/rrddim_mem.c
|
||||
|
@ -1410,11 +1410,11 @@ if(ENABLE_H2O)
|
|||
)
|
||||
|
||||
target_compile_options(h2o PRIVATE
|
||||
-Wno-old-style-declaration
|
||||
-Wno-all -Wno-extra
|
||||
-Wno-shadow
|
||||
-Wno-deprecated-declarations
|
||||
-Wno-unused-parameter
|
||||
-Wno-sign-compare
|
||||
-Wno-missing-field-initializers)
|
||||
-Wformat
|
||||
)
|
||||
|
||||
target_compile_options(h2o PUBLIC -DH2O_USE_LIBUV=0)
|
||||
|
||||
|
@ -1429,12 +1429,13 @@ endif()
|
|||
|
||||
add_library(judy STATIC ${LIBJUDY_SOURCES})
|
||||
|
||||
target_compile_options(judy PUBLIC
|
||||
-Wno-sign-compare
|
||||
-Wno-implicit-fallthrough
|
||||
target_compile_options(judy PRIVATE
|
||||
-Wno-all -Wno-extra
|
||||
-Wno-shadow
|
||||
-Wformat
|
||||
)
|
||||
|
||||
target_compile_definitions(judy PUBLIC
|
||||
target_compile_definitions(judy PRIVATE
|
||||
JUDYL
|
||||
$<$<EQUAL:${CMAKE_SIZEOF_VOID_P},8>:JU_64BIT>
|
||||
)
|
||||
|
@ -2437,6 +2438,7 @@ install(FILES
|
|||
|
||||
install(FILES
|
||||
collectors/systemd-journal.plugin/schema.d/systemd-journal:monitored-directories.json
|
||||
health/schema.d/health:alert:prototype.json
|
||||
DESTINATION usr/lib/netdata/conf.d/schema.d)
|
||||
|
||||
#
|
||||
|
|
|
@ -18,6 +18,7 @@ const struct capability *aclk_get_agent_capas()
|
|||
{ .name = "http_api_v2", .version = HTTP_API_V2_VERSION, .enabled = 1 },
|
||||
{ .name = "health", .version = 1, .enabled = 0 }, // index 7, below
|
||||
{ .name = "req_cancel", .version = 1, .enabled = 1 },
|
||||
//{ .name = "dyncfg", .version = 1, .enabled = 1 },
|
||||
{ .name = NULL, .version = 0, .enabled = 0 }
|
||||
};
|
||||
agent_capabilities[2].version = ml_capable() ? 1 : 0;
|
||||
|
@ -46,6 +47,7 @@ struct capability *aclk_get_node_instance_capas(RRDHOST *host)
|
|||
{ .name = "http_api_v2", .version = HTTP_API_V2_VERSION, .enabled = 1 },
|
||||
{ .name = "health", .version = 1, .enabled = host->health.health_enabled },
|
||||
{ .name = "req_cancel", .version = 1, .enabled = 1 },
|
||||
//{ .name = "dyncfg", .version = 1, .enabled = 1 },
|
||||
{ .name = NULL, .version = 0, .enabled = 0 }
|
||||
};
|
||||
|
||||
|
|
|
@ -145,16 +145,16 @@ static void mws_connack_callback_ng(void *user_ctx, int code)
|
|||
|
||||
static ssize_t mqtt_send_cb(void *user_ctx, const void* buf, size_t len)
|
||||
{
|
||||
mqtt_wss_client mqtt_wss_client = user_ctx;
|
||||
mqtt_wss_client client = user_ctx;
|
||||
#ifdef DEBUG_ULTRA_VERBOSE
|
||||
mws_debug(mqtt_wss_client->log, "mqtt_pal_sendall(len=%d)", len);
|
||||
mws_debug(client->log, "mqtt_pal_sendall(len=%d)", len);
|
||||
#endif
|
||||
int ret = ws_client_send(mqtt_wss_client->ws_client, WS_OP_BINARY_FRAME, buf, len);
|
||||
int ret = ws_client_send(client->ws_client, WS_OP_BINARY_FRAME, buf, len);
|
||||
if (ret >= 0 && (size_t)ret != len) {
|
||||
#ifdef DEBUG_ULTRA_VERBOSE
|
||||
mws_debug(mqtt_wss_client->log, "Not complete message sent (Msg=%d,Sent=%d). Need to arm POLLOUT!", len, ret);
|
||||
mws_debug(client->log, "Not complete message sent (Msg=%d,Sent=%d). Need to arm POLLOUT!", len, ret);
|
||||
#endif
|
||||
mqtt_wss_client->mqtt_didnt_finish_write = 1;
|
||||
client->mqtt_didnt_finish_write = 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -1349,8 +1349,8 @@ void arl_callback_status_nonvoluntary_ctxt_switches(const char *name, uint32_t h
|
|||
pid_incremental_rate(stat, p->status_nonvoluntary_ctxt_switches, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)));
|
||||
}
|
||||
|
||||
static void update_proc_state_count(char proc_state) {
|
||||
switch (proc_state) {
|
||||
static void update_proc_state_count(char proc_stt) {
|
||||
switch (proc_stt) {
|
||||
case 'S':
|
||||
proc_state_count[PROC_STATUS_SLEEPING] += 1;
|
||||
break;
|
||||
|
|
|
@ -96,7 +96,7 @@ void update_cpu_utilization_limit_chart(struct cgroup *cg, NETDATA_DOUBLE cpu_li
|
|||
|
||||
cg->prev_cpu_usage = cpu_usage;
|
||||
|
||||
rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, cpu_limit);
|
||||
rrdvar_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, cpu_limit);
|
||||
rrdset_done(chart);
|
||||
}
|
||||
|
||||
|
|
|
@ -287,16 +287,16 @@ struct cgroup {
|
|||
char *filename_cpu_cfs_quota;
|
||||
unsigned long long cpu_cfs_quota;
|
||||
|
||||
const RRDSETVAR_ACQUIRED *chart_var_cpu_limit;
|
||||
const RRDVAR_ACQUIRED *chart_var_cpu_limit;
|
||||
NETDATA_DOUBLE prev_cpu_usage;
|
||||
|
||||
char *filename_memory_limit;
|
||||
unsigned long long memory_limit;
|
||||
const RRDSETVAR_ACQUIRED *chart_var_memory_limit;
|
||||
const RRDVAR_ACQUIRED *chart_var_memory_limit;
|
||||
|
||||
char *filename_memoryswap_limit;
|
||||
unsigned long long memoryswap_limit;
|
||||
const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit;
|
||||
const RRDVAR_ACQUIRED *chart_var_memoryswap_limit;
|
||||
|
||||
const DICTIONARY_ITEM *cgroup_netdev_link;
|
||||
|
||||
|
|
|
@ -1282,12 +1282,12 @@ cpu_limits2_err:
|
|||
|
||||
static inline int update_memory_limits(struct cgroup *cg) {
|
||||
char **filename = &cg->filename_memory_limit;
|
||||
const RRDSETVAR_ACQUIRED **chart_var = &cg->chart_var_memory_limit;
|
||||
const RRDVAR_ACQUIRED **chart_var = &cg->chart_var_memory_limit;
|
||||
unsigned long long *value = &cg->memory_limit;
|
||||
|
||||
if(*filename) {
|
||||
if(unlikely(!*chart_var)) {
|
||||
*chart_var = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_mem_usage, "memory_limit");
|
||||
*chart_var = rrdvar_chart_variable_add_and_acquire(cg->st_mem_usage, "memory_limit");
|
||||
if(!*chart_var) {
|
||||
collector_error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, "memory_limit");
|
||||
freez(*filename);
|
||||
|
@ -1303,7 +1303,8 @@ static inline int update_memory_limits(struct cgroup *cg) {
|
|||
*filename = NULL;
|
||||
}
|
||||
else {
|
||||
rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
|
||||
rrdvar_chart_variable_set(
|
||||
cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
|
@ -1318,11 +1319,12 @@ static inline int update_memory_limits(struct cgroup *cg) {
|
|||
char *s = "max\n\0";
|
||||
if(strcmp(s, buffer) == 0){
|
||||
*value = UINT64_MAX;
|
||||
rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
|
||||
rrdvar_chart_variable_set(
|
||||
cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
|
||||
return 1;
|
||||
}
|
||||
*value = str2ull(buffer, NULL);
|
||||
rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
|
||||
rrdvar_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -1400,7 +1402,7 @@ void update_cgroup_charts() {
|
|||
}
|
||||
|
||||
if(unlikely(!cg->chart_var_cpu_limit)) {
|
||||
cg->chart_var_cpu_limit = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_cpu, "cpu_limit");
|
||||
cg->chart_var_cpu_limit = rrdvar_chart_variable_add_and_acquire(cg->st_cpu, "cpu_limit");
|
||||
if(!cg->chart_var_cpu_limit) {
|
||||
collector_error("Cannot create cgroup %s chart variable 'cpu_limit'. Will not update its limit anymore.", cg->id);
|
||||
if(cg->filename_cpuset_cpus) freez(cg->filename_cpuset_cpus);
|
||||
|
@ -1430,7 +1432,7 @@ void update_cgroup_charts() {
|
|||
rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit);
|
||||
cg->st_cpu_limit = NULL;
|
||||
}
|
||||
rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, NAN);
|
||||
rrdvar_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, NAN);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -101,7 +101,7 @@ collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number
|
|||
return 0;
|
||||
}
|
||||
|
||||
const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name)
|
||||
const RRDVAR_ACQUIRED *rrdvar_chart_variable_add_and_acquire(RRDSET *st, const char *name)
|
||||
{
|
||||
UNUSED(st);
|
||||
UNUSED(name);
|
||||
|
@ -109,7 +109,7 @@ const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value)
|
||||
void rrdvar_chart_variable_set(RRDSET *st, const RRDVAR_ACQUIRED *rsa, NETDATA_DOUBLE value)
|
||||
{
|
||||
UNUSED(st);
|
||||
UNUSED(rsa);
|
||||
|
|
|
@ -66,9 +66,8 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
, int sensor_bitmask_type
|
||||
, int sensor_bitmask
|
||||
, char **sensor_bitmask_strings
|
||||
, struct netdata_ipmi_state *state
|
||||
);
|
||||
static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events);
|
||||
, struct netdata_ipmi_state *stt);
|
||||
static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *stt, uint32_t events);
|
||||
|
||||
// END NETDATA CODE
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -906,7 +905,7 @@ const char *netdata_collect_type_to_string(IPMI_COLLECTION_TYPE type) {
|
|||
return "unknown";
|
||||
}
|
||||
|
||||
static void netdata_sensor_set_value(struct sensor *sn, void *sensor_reading, struct netdata_ipmi_state *state __maybe_unused) {
|
||||
static void netdata_sensor_set_value(struct sensor *sn, void *sensor_reading, struct netdata_ipmi_state *stt __maybe_unused) {
|
||||
switch(sn->sensor_reading_type) {
|
||||
case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
|
||||
sn->sensor_reading.bool_value = *((uint8_t *)sensor_reading);
|
||||
|
@ -940,8 +939,7 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
, int sensor_bitmask_type __maybe_unused
|
||||
, int sensor_bitmask __maybe_unused
|
||||
, char **sensor_bitmask_strings __maybe_unused
|
||||
, struct netdata_ipmi_state *state
|
||||
) {
|
||||
, struct netdata_ipmi_state *stt) {
|
||||
if(unlikely(sensor_state == IPMI_MONITORING_STATE_UNKNOWN &&
|
||||
sensor_type == IPMI_MONITORING_SENSOR_TYPE_UNKNOWN &&
|
||||
sensor_units == IPMI_MONITORING_SENSOR_UNITS_UNKNOWN &&
|
||||
|
@ -953,38 +951,38 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
if(unlikely(!sensor_name || !*sensor_name))
|
||||
sensor_name = "UNNAMED";
|
||||
|
||||
state->sensors.collected++;
|
||||
stt->sensors.collected++;
|
||||
|
||||
char key[SENSORS_DICT_KEY_SIZE + 1];
|
||||
snprintfz(key, SENSORS_DICT_KEY_SIZE, "i%d_n%d_t%d_u%d_%s",
|
||||
record_id, sensor_number, sensor_reading_type, sensor_units, sensor_name);
|
||||
|
||||
// find the sensor record
|
||||
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(state->sensors.dict, key);
|
||||
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(stt->sensors.dict, key);
|
||||
if(likely(item)) {
|
||||
// recurring collection
|
||||
|
||||
if(state->debug)
|
||||
if(stt->debug)
|
||||
fprintf(stderr, "%s: reusing sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n",
|
||||
program_name, sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type);
|
||||
|
||||
struct sensor *sn = dictionary_acquired_item_value(item);
|
||||
|
||||
if(sensor_reading) {
|
||||
netdata_sensor_set_value(sn, sensor_reading, state);
|
||||
sn->last_collected_metric_ut = state->sensors.now_ut;
|
||||
netdata_sensor_set_value(sn, sensor_reading, stt);
|
||||
sn->last_collected_metric_ut = stt->sensors.now_ut;
|
||||
}
|
||||
|
||||
sn->sensor_state = sensor_state;
|
||||
|
||||
sn->last_collected_state_ut = state->sensors.now_ut;
|
||||
sn->last_collected_state_ut = stt->sensors.now_ut;
|
||||
|
||||
dictionary_acquired_item_release(state->sensors.dict, item);
|
||||
dictionary_acquired_item_release(stt->sensors.dict, item);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if(state->debug)
|
||||
if(stt->debug)
|
||||
fprintf(stderr, "Allocating new sensor data record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n",
|
||||
sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type);
|
||||
|
||||
|
@ -993,12 +991,12 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
bool excluded_state = excluded_status_record_ids_check(record_id);
|
||||
|
||||
if(excluded_metric) {
|
||||
if(state->debug)
|
||||
if(stt->debug)
|
||||
fprintf(stderr, "Sensor '%s' is excluded by excluded_record_ids_check()\n", sensor_name);
|
||||
}
|
||||
|
||||
if(excluded_state) {
|
||||
if(state->debug)
|
||||
if(stt->debug)
|
||||
fprintf(stderr, "Sensor '%s' is excluded for status check, by excluded_status_record_ids_check()\n", sensor_name);
|
||||
}
|
||||
|
||||
|
@ -1023,7 +1021,7 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
t.units = "Celsius";
|
||||
t.family = "temperatures";
|
||||
t.chart_type = "line";
|
||||
t.priority = state->sensors.priority + 10;
|
||||
t.priority = stt->sensors.priority + 10;
|
||||
break;
|
||||
|
||||
case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT:
|
||||
|
@ -1033,7 +1031,7 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
t.units = "Fahrenheit";
|
||||
t.family = "temperatures";
|
||||
t.chart_type = "line";
|
||||
t.priority = state->sensors.priority + 20;
|
||||
t.priority = stt->sensors.priority + 20;
|
||||
break;
|
||||
|
||||
case IPMI_MONITORING_SENSOR_UNITS_VOLTS:
|
||||
|
@ -1043,7 +1041,7 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
t.units = "Volts";
|
||||
t.family = "voltages";
|
||||
t.chart_type = "line";
|
||||
t.priority = state->sensors.priority + 30;
|
||||
t.priority = stt->sensors.priority + 30;
|
||||
break;
|
||||
|
||||
case IPMI_MONITORING_SENSOR_UNITS_AMPS:
|
||||
|
@ -1053,7 +1051,7 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
t.units = "Amps";
|
||||
t.family = "current";
|
||||
t.chart_type = "line";
|
||||
t.priority = state->sensors.priority + 40;
|
||||
t.priority = stt->sensors.priority + 40;
|
||||
break;
|
||||
|
||||
case IPMI_MONITORING_SENSOR_UNITS_RPM:
|
||||
|
@ -1063,7 +1061,7 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
t.units = "RPM";
|
||||
t.family = "fans";
|
||||
t.chart_type = "line";
|
||||
t.priority = state->sensors.priority + 50;
|
||||
t.priority = stt->sensors.priority + 50;
|
||||
break;
|
||||
|
||||
case IPMI_MONITORING_SENSOR_UNITS_WATTS:
|
||||
|
@ -1073,7 +1071,7 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
t.units = "Watts";
|
||||
t.family = "power";
|
||||
t.chart_type = "line";
|
||||
t.priority = state->sensors.priority + 60;
|
||||
t.priority = stt->sensors.priority + 60;
|
||||
break;
|
||||
|
||||
case IPMI_MONITORING_SENSOR_UNITS_PERCENT:
|
||||
|
@ -1083,11 +1081,11 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
t.units = "%%";
|
||||
t.family = "other";
|
||||
t.chart_type = "line";
|
||||
t.priority = state->sensors.priority + 70;
|
||||
t.priority = stt->sensors.priority + 70;
|
||||
break;
|
||||
|
||||
default:
|
||||
t.priority = state->sensors.priority + 80;
|
||||
t.priority = stt->sensors.priority + 80;
|
||||
t.do_metric = false;
|
||||
break;
|
||||
}
|
||||
|
@ -1108,57 +1106,57 @@ static void netdata_update_ipmi_sensor_reading(
|
|||
}
|
||||
|
||||
if(sensor_reading) {
|
||||
netdata_sensor_set_value(&t, sensor_reading, state);
|
||||
t.last_collected_metric_ut = state->sensors.now_ut;
|
||||
netdata_sensor_set_value(&t, sensor_reading, stt);
|
||||
t.last_collected_metric_ut = stt->sensors.now_ut;
|
||||
}
|
||||
t.last_collected_state_ut = state->sensors.now_ut;
|
||||
t.last_collected_state_ut = stt->sensors.now_ut;
|
||||
|
||||
dictionary_set(state->sensors.dict, key, &t, sizeof(t));
|
||||
dictionary_set(stt->sensors.dict, key, &t, sizeof(t));
|
||||
}
|
||||
|
||||
static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events) {
|
||||
state->sel.events = events;
|
||||
static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *stt, uint32_t events) {
|
||||
stt->sel.events = events;
|
||||
}
|
||||
|
||||
int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) {
|
||||
int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *stt) {
|
||||
errno = 0;
|
||||
|
||||
if(type & IPMI_COLLECT_TYPE_SENSORS) {
|
||||
state->sensors.collected = 0;
|
||||
state->sensors.now_ut = now_monotonic_usec();
|
||||
stt->sensors.collected = 0;
|
||||
stt->sensors.now_ut = now_monotonic_usec();
|
||||
|
||||
if (netdata_read_ipmi_sensors(ipmi_config, state) < 0) return -1;
|
||||
if (netdata_read_ipmi_sensors(ipmi_config, stt) < 0) return -1;
|
||||
}
|
||||
|
||||
if(type & IPMI_COLLECT_TYPE_SEL) {
|
||||
state->sel.events = 0;
|
||||
state->sel.now_ut = now_monotonic_usec();
|
||||
if(netdata_get_ipmi_sel_events_count(ipmi_config, state) < 0) return -2;
|
||||
stt->sel.events = 0;
|
||||
stt->sel.now_ut = now_monotonic_usec();
|
||||
if(netdata_get_ipmi_sel_events_count(ipmi_config, stt) < 0) return -2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int netdata_ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) {
|
||||
int netdata_ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *stt) {
|
||||
int i, checks = SPEED_TEST_ITERATIONS, successful = 0;
|
||||
usec_t total = 0;
|
||||
|
||||
for(i = 0 ; i < checks ; i++) {
|
||||
if(unlikely(state->debug))
|
||||
if(unlikely(stt->debug))
|
||||
fprintf(stderr, "%s: checking %s data collection speed iteration %d of %d\n",
|
||||
program_name, netdata_collect_type_to_string(type), i + 1, checks);
|
||||
|
||||
// measure the time a data collection needs
|
||||
usec_t start = now_realtime_usec();
|
||||
|
||||
if(netdata_ipmi_collect_data(ipmi_config, type, state) < 0)
|
||||
if(netdata_ipmi_collect_data(ipmi_config, type, stt) < 0)
|
||||
continue;
|
||||
|
||||
usec_t end = now_realtime_usec();
|
||||
|
||||
successful++;
|
||||
|
||||
if(unlikely(state->debug))
|
||||
if(unlikely(stt->debug))
|
||||
fprintf(stderr, "%s: %s data collection speed was %"PRIu64" usec\n",
|
||||
program_name, netdata_collect_type_to_string(type), end - start);
|
||||
|
||||
|
@ -1298,31 +1296,32 @@ static inline bool is_sensor_updated(usec_t last_collected_ut, usec_t now_ut, us
|
|||
return (now_ut - last_collected_ut < freq * 2) ? true : false;
|
||||
}
|
||||
|
||||
static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *state) {
|
||||
if(state->sensors.status != ICS_RUNNING) {
|
||||
if(unlikely(state->debug))
|
||||
static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *stt) {
|
||||
if(stt->sensors.status != ICS_RUNNING) {
|
||||
if(unlikely(stt->debug))
|
||||
fprintf(stderr, "%s: %s() sensors state is not RUNNING\n",
|
||||
program_name, __FUNCTION__ );
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t total_sensors_sent = 0;
|
||||
int update_every = (int)(state->sensors.freq_ut / USEC_PER_SEC);
|
||||
int update_every_s = (int)(stt->sensors.freq_ut / USEC_PER_SEC);
|
||||
struct sensor *sn;
|
||||
|
||||
netdata_mutex_lock(&stdout_mutex);
|
||||
// generate the CHART/DIMENSION lines, if we have to
|
||||
dfe_start_reentrant(state->sensors.dict, sn) {
|
||||
dfe_start_reentrant(stt->sensors.dict, sn) {
|
||||
if(unlikely(!sn->do_metric && !sn->do_state))
|
||||
continue;
|
||||
|
||||
bool did_metric = false, did_state = false;
|
||||
|
||||
if(likely(sn->do_metric)) {
|
||||
if(unlikely(!is_sensor_updated(sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut))) {
|
||||
if(unlikely(state->debug))
|
||||
if(unlikely(!is_sensor_updated(sn->last_collected_metric_ut, stt->updates.now_ut, stt->sensors.freq_ut))) {
|
||||
if(unlikely(stt->debug))
|
||||
fprintf(stderr, "%s: %s() sensor '%s' metric is not UPDATED (last updated %"PRIu64", now %"PRIu64", freq %"PRIu64"\n",
|
||||
program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut);
|
||||
program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_metric_ut,
|
||||
stt->updates.now_ut, stt->sensors.freq_ut);
|
||||
}
|
||||
else {
|
||||
if (unlikely(!sn->metric_chart_sent)) {
|
||||
|
@ -1330,7 +1329,8 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta
|
|||
|
||||
printf("CHART '%s_%s' '' '%s' '%s' '%s' '%s' '%s' %d %d '' '%s' '%s'\n",
|
||||
sn->context, sn_dfe.name, sn->title, sn->units, sn->family, sn->context,
|
||||
sn->chart_type, sn->priority + 1, update_every, program_name, "sensors");
|
||||
sn->chart_type, sn->priority + 1,
|
||||
update_every_s, program_name, "sensors");
|
||||
|
||||
printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name);
|
||||
printf("CLABEL 'type' '%s' 1\n", sn->type);
|
||||
|
@ -1344,19 +1344,16 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta
|
|||
|
||||
switch (sn->sensor_reading_type) {
|
||||
case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
|
||||
printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.uint32_value
|
||||
);
|
||||
printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.uint32_value);
|
||||
break;
|
||||
|
||||
case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
|
||||
printf("SET '%s' = %lld\n", sn->dimension,
|
||||
(long long int) (sn->sensor_reading.double_value * sn->multiplier)
|
||||
);
|
||||
(long long int) (sn->sensor_reading.double_value * sn->multiplier));
|
||||
break;
|
||||
|
||||
case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
|
||||
printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.bool_value
|
||||
);
|
||||
printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.bool_value);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1372,17 +1369,18 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta
|
|||
}
|
||||
|
||||
if(likely(sn->do_state)) {
|
||||
if(unlikely(!is_sensor_updated(sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut))) {
|
||||
if (unlikely(state->debug))
|
||||
if(unlikely(!is_sensor_updated(sn->last_collected_state_ut, stt->updates.now_ut, stt->sensors.freq_ut))) {
|
||||
if (unlikely(stt->debug))
|
||||
fprintf(stderr, "%s: %s() sensor '%s' state is not UPDATED (last updated %"PRIu64", now %"PRIu64", freq %"PRIu64"\n",
|
||||
program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut);
|
||||
program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_state_ut,
|
||||
stt->updates.now_ut, stt->sensors.freq_ut);
|
||||
}
|
||||
else {
|
||||
if (unlikely(!sn->state_chart_sent)) {
|
||||
sn->state_chart_sent = true;
|
||||
|
||||
printf("CHART 'ipmi.sensor_state_%s' '' 'IPMI Sensor State' 'state' 'states' 'ipmi.sensor_state' 'line' %d %d '' '%s' '%s'\n",
|
||||
sn_dfe.name, sn->priority, update_every, program_name, "sensors");
|
||||
sn_dfe.name, sn->priority, update_every_s, program_name, "sensors");
|
||||
|
||||
printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name);
|
||||
printf("CLABEL 'type' '%s' 1\n", sn->type);
|
||||
|
@ -1415,17 +1413,17 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta
|
|||
return total_sensors_sent;
|
||||
}
|
||||
|
||||
static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state) {
|
||||
static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *stt) {
|
||||
static bool sel_chart_generated = false;
|
||||
|
||||
netdata_mutex_lock(&stdout_mutex);
|
||||
|
||||
if(likely(state->sel.status == ICS_RUNNING)) {
|
||||
if(likely(stt->sel.status == ICS_RUNNING)) {
|
||||
if(unlikely(!sel_chart_generated)) {
|
||||
sel_chart_generated = true;
|
||||
printf("CHART ipmi.events '' 'IPMI Events' 'events' 'events' ipmi.sel area %d %d '' '%s' '%s'\n"
|
||||
, state->sel.priority + 2
|
||||
, (int)(state->sel.freq_ut / USEC_PER_SEC)
|
||||
, stt->sel.priority + 2
|
||||
, (int)(stt->sel.freq_ut / USEC_PER_SEC)
|
||||
, program_name
|
||||
, "sel"
|
||||
);
|
||||
|
@ -1436,13 +1434,14 @@ static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state)
|
|||
"BEGIN ipmi.events\n"
|
||||
"SET events = %zu\n"
|
||||
"END\n"
|
||||
, state->sel.events
|
||||
,
|
||||
stt->sel.events
|
||||
);
|
||||
}
|
||||
|
||||
netdata_mutex_unlock(&stdout_mutex);
|
||||
|
||||
return state->sel.events;
|
||||
return stt->sel.events;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -1618,7 +1617,7 @@ static void freeimi_function_sensors(const char *transaction, char *function __m
|
|||
// ----------------------------------------------------------------------------
|
||||
// main, command line arguments parsing
|
||||
|
||||
static void plugin_exit(int code) {
|
||||
static NORETURN void plugin_exit(int code) {
|
||||
fflush(stdout);
|
||||
function_plugin_should_exit = true;
|
||||
exit(code);
|
||||
|
|
|
@ -693,7 +693,8 @@ When responding to additions and updates, Netdata uses the following success res
|
|||
|
||||
- `200`, responding with 200, means the configuration has been accepted and it is running.
|
||||
- `202`, responding with 202, means the configuration has been accepted but it is not yet running. A subsequent `status` action will update it.
|
||||
- `299`, responding with 299, means the configuration has been accepted but a restart is required to apply it.
|
||||
- `298`, responding with 298, means the configuration has been accepted but it is disabled for some reason (probably because it matches nothing or the contents are not useful - use the `message` to provide additional information).
|
||||
- `299`, responding with 299, means the configuration has been accepted but a restart is required to apply it.
|
||||
|
||||
## Data collection
|
||||
|
||||
|
|
|
@ -190,7 +190,7 @@ static inline PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, si
|
|||
default_rrd_update_every,
|
||||
default_rrd_history_entries,
|
||||
default_rrd_memory_mode,
|
||||
default_health_enabled,
|
||||
health_plugin_enabled(),
|
||||
default_rrdpush_enabled,
|
||||
default_rrdpush_destination,
|
||||
default_rrdpush_api_key,
|
||||
|
@ -568,20 +568,20 @@ static inline PARSER_RC pluginsd_variable(char **words, size_t num_words, PARSER
|
|||
}
|
||||
|
||||
if (global) {
|
||||
const RRDVAR_ACQUIRED *rva = rrdvar_custom_host_variable_add_and_acquire(host, name);
|
||||
const RRDVAR_ACQUIRED *rva = rrdvar_host_variable_add_and_acquire(host, name);
|
||||
if (rva) {
|
||||
rrdvar_custom_host_variable_set(host, rva, v);
|
||||
rrdvar_custom_host_variable_release(host, rva);
|
||||
rrdvar_host_variable_set(host, rva, v);
|
||||
rrdvar_host_variable_release(host, rva);
|
||||
}
|
||||
else
|
||||
netdata_log_error("PLUGINSD: 'host:%s' cannot find/create HOST VARIABLE '%s'",
|
||||
rrdhost_hostname(host),
|
||||
name);
|
||||
} else {
|
||||
const RRDSETVAR_ACQUIRED *rsa = rrdsetvar_custom_chart_variable_add_and_acquire(st, name);
|
||||
const RRDVAR_ACQUIRED *rsa = rrdvar_chart_variable_add_and_acquire(st, name);
|
||||
if (rsa) {
|
||||
rrdsetvar_custom_chart_variable_set(st, rsa, v);
|
||||
rrdsetvar_custom_chart_variable_release(st, rsa);
|
||||
rrdvar_chart_variable_set(st, rsa, v);
|
||||
rrdvar_chart_variable_release(st, rsa);
|
||||
}
|
||||
else
|
||||
netdata_log_error("PLUGINSD: 'host:%s/chart:%s' cannot find/create CHART VARIABLE '%s'",
|
||||
|
|
|
@ -352,8 +352,8 @@ int do_ipc(int update_every, usec_t dt) {
|
|||
}
|
||||
|
||||
// variables
|
||||
semaphores_max = rrdvar_custom_host_variable_add_and_acquire(localhost, "ipc_semaphores_max");
|
||||
arrays_max = rrdvar_custom_host_variable_add_and_acquire(localhost, "ipc_semaphores_arrays_max");
|
||||
semaphores_max = rrdvar_host_variable_add_and_acquire(localhost, "ipc_semaphores_max");
|
||||
arrays_max = rrdvar_host_variable_add_and_acquire(localhost, "ipc_semaphores_arrays_max");
|
||||
}
|
||||
|
||||
struct stat stbuf;
|
||||
|
@ -373,8 +373,10 @@ int do_ipc(int update_every, usec_t dt) {
|
|||
collector_error("Unable to fetch semaphore limits.");
|
||||
}
|
||||
else {
|
||||
if(semaphores_max) rrdvar_custom_host_variable_set(localhost, semaphores_max, limits.semmns);
|
||||
if(arrays_max) rrdvar_custom_host_variable_set(localhost, arrays_max, limits.semmni);
|
||||
if(semaphores_max)
|
||||
rrdvar_host_variable_set(localhost, semaphores_max, limits.semmns);
|
||||
if(arrays_max)
|
||||
rrdvar_host_variable_set(localhost, arrays_max, limits.semmni);
|
||||
|
||||
st_arrays->red = limits.semmni;
|
||||
st_semaphores->red = limits.semmns;
|
||||
|
|
|
@ -95,7 +95,7 @@ int do_proc_loadavg(int update_every, usec_t dt) {
|
|||
if(likely(do_all_processes)) {
|
||||
static RRDSET *processes_chart = NULL;
|
||||
static RRDDIM *rd_active = NULL;
|
||||
static const RRDSETVAR_ACQUIRED *rd_pidmax;
|
||||
static const RRDVAR_ACQUIRED *rd_pidmax;
|
||||
|
||||
if(unlikely(!processes_chart)) {
|
||||
processes_chart = rrdset_create_localhost(
|
||||
|
@ -114,11 +114,11 @@ int do_proc_loadavg(int update_every, usec_t dt) {
|
|||
);
|
||||
|
||||
rd_active = rrddim_add(processes_chart, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_pidmax = rrdsetvar_custom_chart_variable_add_and_acquire(processes_chart, "pidmax");
|
||||
rd_pidmax = rrdvar_chart_variable_add_and_acquire(processes_chart, "pidmax");
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(processes_chart, rd_active, active_processes);
|
||||
rrdsetvar_custom_chart_variable_set(processes_chart, rd_pidmax, max_processes);
|
||||
rrdvar_chart_variable_set(processes_chart, rd_pidmax, max_processes);
|
||||
rrdset_done(processes_chart);
|
||||
}
|
||||
|
||||
|
|
|
@ -237,7 +237,7 @@ static struct netdev {
|
|||
RRDDIM *rd_mtu;
|
||||
|
||||
char *filename_speed;
|
||||
const RRDSETVAR_ACQUIRED *chart_var_speed;
|
||||
const RRDVAR_ACQUIRED *chart_var_speed;
|
||||
|
||||
char *filename_duplex;
|
||||
char *filename_operstate;
|
||||
|
@ -1241,20 +1241,20 @@ int do_proc_net_dev(int update_every, usec_t dt) {
|
|||
d->flipped ? d->rd_tbytes->collector.last_stored_value : -d->rd_rbytes->collector.last_stored_value,
|
||||
d->flipped ? -d->rd_rbytes->collector.last_stored_value : d->rd_tbytes->collector.last_stored_value);
|
||||
|
||||
if(unlikely(!d->chart_var_speed)) {
|
||||
d->chart_var_speed = rrdvar_chart_variable_add_and_acquire(d->st_bandwidth, "nic_speed_max");
|
||||
if(!d->chart_var_speed) {
|
||||
collector_error(
|
||||
"Cannot create interface %s chart variable 'nic_speed_max'. Will not update its speed anymore.",
|
||||
d->name);
|
||||
}
|
||||
else {
|
||||
rrdvar_chart_variable_set(d->st_bandwidth, d->chart_var_speed, NAN);
|
||||
}
|
||||
}
|
||||
|
||||
// update the interface speed
|
||||
if(d->filename_speed) {
|
||||
if(unlikely(!d->chart_var_speed)) {
|
||||
d->chart_var_speed =
|
||||
rrdsetvar_custom_chart_variable_add_and_acquire(d->st_bandwidth, "nic_speed_max");
|
||||
if(!d->chart_var_speed) {
|
||||
collector_error(
|
||||
"Cannot create interface %s chart variable 'nic_speed_max'. Will not update its speed anymore.",
|
||||
d->name);
|
||||
freez(d->filename_speed);
|
||||
d->filename_speed = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (d->filename_speed && d->chart_var_speed) {
|
||||
int ret = 0;
|
||||
|
||||
|
@ -1300,7 +1300,7 @@ int do_proc_net_dev(int update_every, usec_t dt) {
|
|||
rrdset_done(d->st_speed);
|
||||
}
|
||||
|
||||
rrdsetvar_custom_chart_variable_set(
|
||||
rrdvar_chart_variable_set(
|
||||
d->st_bandwidth, d->chart_var_speed, (NETDATA_DOUBLE)d->speed * KILOBITS_IN_A_MEGABIT);
|
||||
|
||||
if (d->speed) {
|
||||
|
|
|
@ -1647,7 +1647,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
|
|||
arl_expect(arl_udplite, "InCsumErrors", &snmp_root.udplite_InCsumErrors);
|
||||
arl_expect(arl_udplite, "IgnoredMulti", &snmp_root.udplite_IgnoredMulti);
|
||||
|
||||
tcp_max_connections_var = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_max_connections");
|
||||
tcp_max_connections_var = rrdvar_host_variable_add_and_acquire(localhost, "tcp_max_connections");
|
||||
}
|
||||
|
||||
size_t lines, l, words;
|
||||
|
@ -2713,7 +2713,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
|
|||
// snmp Tcp charts
|
||||
|
||||
// this is smart enough to update it, only when it is changed
|
||||
rrdvar_custom_host_variable_set(localhost, tcp_max_connections_var, snmp_root.tcp_MaxConn);
|
||||
rrdvar_host_variable_set(localhost, tcp_max_connections_var, snmp_root.tcp_MaxConn);
|
||||
|
||||
// see http://net-snmp.sourceforge.net/docs/mibs/tcp.html
|
||||
if(do_tcp_sockets == CONFIG_BOOLEAN_YES || (do_tcp_sockets == CONFIG_BOOLEAN_AUTO &&
|
||||
|
|
|
@ -32,9 +32,9 @@ static int read_tcp_mem(void) {
|
|||
*tcp_mem_high_threshold = NULL;
|
||||
|
||||
if(unlikely(!tcp_mem_low_threshold)) {
|
||||
tcp_mem_low_threshold = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_mem_low");
|
||||
tcp_mem_pressure_threshold = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_mem_pressure");
|
||||
tcp_mem_high_threshold = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_mem_high");
|
||||
tcp_mem_low_threshold = rrdvar_host_variable_add_and_acquire(localhost, "tcp_mem_low");
|
||||
tcp_mem_pressure_threshold = rrdvar_host_variable_add_and_acquire(localhost, "tcp_mem_pressure");
|
||||
tcp_mem_high_threshold = rrdvar_host_variable_add_and_acquire(localhost, "tcp_mem_high");
|
||||
}
|
||||
|
||||
if(unlikely(!filename)) {
|
||||
|
@ -60,9 +60,9 @@ static int read_tcp_mem(void) {
|
|||
|
||||
// fprintf(stderr, "TCP MEM low = %llu, pressure = %llu, high = %llu\n", low, pressure, high);
|
||||
|
||||
rrdvar_custom_host_variable_set(localhost, tcp_mem_low_threshold, low * sysconf(_SC_PAGESIZE) / 1024.0);
|
||||
rrdvar_custom_host_variable_set(localhost, tcp_mem_pressure_threshold, pressure * sysconf(_SC_PAGESIZE) / 1024.0);
|
||||
rrdvar_custom_host_variable_set(localhost, tcp_mem_high_threshold, high * sysconf(_SC_PAGESIZE) / 1024.0);
|
||||
rrdvar_host_variable_set(localhost, tcp_mem_low_threshold, low * sysconf(_SC_PAGESIZE) / 1024.0);
|
||||
rrdvar_host_variable_set(localhost, tcp_mem_pressure_threshold, pressure * sysconf(_SC_PAGESIZE) / 1024.0);
|
||||
rrdvar_host_variable_set(localhost, tcp_mem_high_threshold, high * sysconf(_SC_PAGESIZE) / 1024.0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -81,9 +81,9 @@ static kernel_uint_t read_tcp_max_orphans(void) {
|
|||
if(read_single_number_file(filename, &tcp_max_orphans) == 0) {
|
||||
|
||||
if(unlikely(!tcp_max_orphans_var))
|
||||
tcp_max_orphans_var = rrdvar_custom_host_variable_add_and_acquire(localhost, "tcp_max_orphans");
|
||||
tcp_max_orphans_var = rrdvar_host_variable_add_and_acquire(localhost, "tcp_max_orphans");
|
||||
|
||||
rrdvar_custom_host_variable_set(localhost, tcp_max_orphans_var, tcp_max_orphans);
|
||||
rrdvar_host_variable_set(localhost, tcp_max_orphans_var, tcp_max_orphans);
|
||||
return tcp_max_orphans;
|
||||
}
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ int do_proc_net_stat_conntrack(int update_every, usec_t dt) {
|
|||
if(!do_sockets && !read_full)
|
||||
return 1;
|
||||
|
||||
rrdvar_max = rrdvar_custom_host_variable_add_and_acquire(localhost, "netfilter_conntrack_max");
|
||||
rrdvar_max = rrdvar_host_variable_add_and_acquire(localhost, "netfilter_conntrack_max");
|
||||
}
|
||||
|
||||
if(likely(read_full)) {
|
||||
|
@ -125,7 +125,7 @@ int do_proc_net_stat_conntrack(int update_every, usec_t dt) {
|
|||
|
||||
unsigned long long max;
|
||||
if(likely(!read_single_number_file(nf_conntrack_max_filename, &max)))
|
||||
rrdvar_custom_host_variable_set(localhost, rrdvar_max, max);
|
||||
rrdvar_host_variable_set(localhost, rrdvar_max, max);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
|
|
@ -730,7 +730,7 @@ int do_proc_stat(int update_every, usec_t dt) {
|
|||
}
|
||||
|
||||
if(unlikely(core == 0 && cpus_var == NULL))
|
||||
cpus_var = rrdvar_custom_host_variable_add_and_acquire(localhost, "active_processors");
|
||||
cpus_var = rrdvar_host_variable_add_and_acquire(localhost, "active_processors");
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_user, user);
|
||||
|
@ -1075,7 +1075,7 @@ int do_proc_stat(int update_every, usec_t dt) {
|
|||
}
|
||||
|
||||
if(cpus_var)
|
||||
rrdvar_custom_host_variable_set(localhost, cpus_var, cores_found);
|
||||
rrdvar_host_variable_set(localhost, cpus_var, cores_found);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -849,8 +849,6 @@ int do_sys_class_drm(int update_every, usec_t dt) {
|
|||
(de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0'))) continue;
|
||||
|
||||
if(de->d_type == DT_LNK && !strncmp(de->d_name, "card", 4) && !strchr(de->d_name, '-')) {
|
||||
char filename[FILENAME_MAX + 1];
|
||||
|
||||
snprintfz(filename, FILENAME_MAX, "%s/%s/%s", drm_dir_name, de->d_name, "device/uevent");
|
||||
if(check_card_is_amdgpu(filename)) continue;
|
||||
|
||||
|
|
|
@ -184,7 +184,7 @@ static struct ibport {
|
|||
RRDSET *st_hwpackets;
|
||||
RRDSET *st_hwerrors;
|
||||
|
||||
const RRDSETVAR_ACQUIRED *stv_speed;
|
||||
const RRDVAR_ACQUIRED *stv_speed;
|
||||
|
||||
usec_t speed_last_collected_usec;
|
||||
|
||||
|
@ -545,14 +545,14 @@ int do_sys_class_infiniband(int update_every, usec_t dt)
|
|||
// x4 lanes multiplier as per Documentation/ABI/stable/sysfs-class-infiniband
|
||||
FOREACH_COUNTER_BYTES(GEN_RRD_DIM_ADD_CUSTOM, port, port->width * 8, 1000, RRD_ALGORITHM_INCREMENTAL)
|
||||
|
||||
port->stv_speed = rrdsetvar_custom_chart_variable_add_and_acquire(port->st_bytes, "link_speed");
|
||||
port->stv_speed = rrdvar_chart_variable_add_and_acquire(port->st_bytes, "link_speed");
|
||||
}
|
||||
|
||||
// Link read values to dimensions
|
||||
FOREACH_COUNTER_BYTES(GEN_RRD_DIM_SETP, port)
|
||||
|
||||
// For link speed set only variable
|
||||
rrdsetvar_custom_chart_variable_set(port->st_bytes, port->stv_speed, port->speed);
|
||||
rrdvar_chart_variable_set(port->st_bytes, port->stv_speed, port->speed);
|
||||
|
||||
rrdset_done(port->st_bytes);
|
||||
}
|
||||
|
|
|
@ -1531,8 +1531,7 @@ static int statsd_readfile(const char *filename, STATSD_APP *app, STATSD_APP_CHA
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int statsd_file_callback(const char *filename, void *data) {
|
||||
(void)data;
|
||||
static int statsd_file_callback(const char *filename, void *data __maybe_unused, bool stock_config __maybe_unused) {
|
||||
return statsd_readfile(filename, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ static int systemd_journal_directories_dyncfg_get(BUFFER *wb) {
|
|||
static int systemd_journal_directories_dyncfg_cb(const char *transaction,
|
||||
const char *id,
|
||||
DYNCFG_CMDS cmd,
|
||||
const char *add_name __maybe_unused,
|
||||
BUFFER *payload,
|
||||
usec_t *stop_monotonic_ut __maybe_unused,
|
||||
bool *cancelled __maybe_unused,
|
||||
|
|
|
@ -1207,19 +1207,19 @@ static void systemd_unit_priority(UnitInfo *u, size_t units) {
|
|||
u->prio = (prio * units) + u->prio;
|
||||
}
|
||||
|
||||
#define if_less(current, max, target) ({ \
|
||||
typeof(current) _wanted = (current); \
|
||||
if((current) < (target)) \
|
||||
_wanted = (target) > (max) ? (max) : (target); \
|
||||
_wanted; \
|
||||
})
|
||||
static inline FACET_ROW_SEVERITY if_less(FACET_ROW_SEVERITY current, FACET_ROW_SEVERITY max, FACET_ROW_SEVERITY target) {
|
||||
FACET_ROW_SEVERITY wanted = current;
|
||||
if(current < target)
|
||||
wanted = target > max ? max : target;
|
||||
return wanted;
|
||||
}
|
||||
|
||||
#define if_normal(current, max, target) ({ \
|
||||
typeof(current) _wanted = (current); \
|
||||
if((current) == FACET_ROW_SEVERITY_NORMAL) \
|
||||
_wanted = (target) > (max) ? (max) : (target); \
|
||||
_wanted; \
|
||||
})
|
||||
static inline FACET_ROW_SEVERITY if_normal(FACET_ROW_SEVERITY current, FACET_ROW_SEVERITY max, FACET_ROW_SEVERITY target) {
|
||||
FACET_ROW_SEVERITY wanted = current;
|
||||
if(current == FACET_ROW_SEVERITY_NORMAL)
|
||||
wanted = target > max ? max : target;
|
||||
return wanted;
|
||||
}
|
||||
|
||||
FACET_ROW_SEVERITY system_unit_severity(UnitInfo *u) {
|
||||
FACET_ROW_SEVERITY severity, max_severity;
|
||||
|
|
|
@ -612,8 +612,8 @@ void *analytics_main(void *ptr)
|
|||
|
||||
analytics_gather_mutable_meta_data();
|
||||
|
||||
analytics_statistic_t statistic = { "META", "-", "-" };
|
||||
analytics_statistic_send(&statistic);
|
||||
analytics_statistic_t stt = { "META", "-", "-" };
|
||||
analytics_statistic_send(&stt);
|
||||
analytics_log_data();
|
||||
|
||||
sec = 0;
|
||||
|
|
|
@ -142,7 +142,7 @@ static cmd_status_t cmd_reload_health_execute(char *args, char **message)
|
|||
|
||||
nd_log_limits_unlimited();
|
||||
netdata_log_info("COMMAND: Reloading HEALTH configuration.");
|
||||
health_reload();
|
||||
health_plugin_reload();
|
||||
nd_log_limits_reset();
|
||||
|
||||
return CMD_STATUS_SUCCESS;
|
||||
|
|
|
@ -38,6 +38,9 @@
|
|||
|
||||
#include "global_statistics.h"
|
||||
|
||||
// health monitoring and alarm notifications
|
||||
#include "health/health.h"
|
||||
|
||||
// the netdata database
|
||||
#include "database/rrd.h"
|
||||
|
||||
|
@ -52,8 +55,6 @@
|
|||
// streaming metrics between netdata servers
|
||||
#include "streaming/rrdpush.h"
|
||||
|
||||
// health monitoring and alarm notifications
|
||||
#include "health/health.h"
|
||||
|
||||
// anomaly detection
|
||||
#include "ml/ml.h"
|
||||
|
|
|
@ -14,23 +14,41 @@ struct dyncfg_echo {
|
|||
const DICTIONARY_ITEM *item;
|
||||
DYNCFG *df;
|
||||
BUFFER *wb;
|
||||
const char *cmd;
|
||||
};
|
||||
|
||||
void dyncfg_echo_cb(BUFFER *wb __maybe_unused, int code __maybe_unused, void *result_cb_data) {
|
||||
struct dyncfg_echo *e = result_cb_data;
|
||||
|
||||
if(!DYNCFG_RESP_SUCCESS(code))
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR,
|
||||
"DYNCFG: received response code %d on request to id '%s', cmd: %s",
|
||||
code, dictionary_acquired_item_name(e->item), e->cmd);
|
||||
|
||||
buffer_free(e->wb);
|
||||
dictionary_acquired_item_release(dyncfg_globals.nodes, e->item);
|
||||
|
||||
e->wb = NULL;
|
||||
e->df = NULL;
|
||||
e->item = NULL;
|
||||
freez((void *)e->cmd);
|
||||
e->cmd = NULL;
|
||||
freez(e);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id __maybe_unused, DYNCFG_CMDS cmd) {
|
||||
if(!(df->cmds & cmd))
|
||||
RRDHOST *host = dyncfg_rrdhost(df);
|
||||
if(!host) {
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!(df->cmds & cmd)) {
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: attempted to echo a cmd that is not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
const char *cmd_str = dyncfg_id2cmd_one(cmd);
|
||||
if(!cmd_str) {
|
||||
|
@ -42,42 +60,90 @@ void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id __maybe
|
|||
e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item);
|
||||
e->wb = buffer_create(0, NULL);
|
||||
e->df = df;
|
||||
e->cmd = strdupz(cmd_str);
|
||||
|
||||
char buf[string_strlen(df->function) + strlen(cmd_str) + 20];
|
||||
snprintfz(buf, sizeof(buf), "%s %s", string2str(df->function), cmd_str);
|
||||
|
||||
rrd_function_run(df->host, e->wb, 10, HTTP_ACCESS_ADMIN, buf, false, NULL,
|
||||
dyncfg_echo_cb, e,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL);
|
||||
rrd_function_run(
|
||||
host, e->wb, 10, HTTP_ACCESS_ADMIN, buf, false, NULL,
|
||||
dyncfg_echo_cb, e,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, string2str(df->source));
|
||||
}
|
||||
|
||||
static void dyncfg_echo_payload(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id __maybe_unused, const char *cmd) {
|
||||
if(!df->payload)
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
static void dyncfg_echo_payload(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id, const char *cmd) {
|
||||
RRDHOST *host = dyncfg_rrdhost(df);
|
||||
if(!host) {
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!df->payload) {
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: requested to send a '%s' to '%s', but there is no payload", cmd, id);
|
||||
return;
|
||||
}
|
||||
|
||||
struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo));
|
||||
e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item);
|
||||
e->wb = buffer_create(0, NULL);
|
||||
e->df = df;
|
||||
e->cmd = strdupz(cmd);
|
||||
|
||||
char buf[string_strlen(df->function) + strlen(cmd) + 20];
|
||||
snprintfz(buf, sizeof(buf), "%s %s", string2str(df->function), cmd);
|
||||
|
||||
rrd_function_run(df->host, e->wb, 10, HTTP_ACCESS_ADMIN, buf, false, NULL,
|
||||
dyncfg_echo_cb, e,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
df->payload, NULL);
|
||||
rrd_function_run(
|
||||
host, e->wb, 10, HTTP_ACCESS_ADMIN, buf, false, NULL,
|
||||
dyncfg_echo_cb, e,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
df->payload, string2str(df->source));
|
||||
}
|
||||
|
||||
void dyncfg_echo_update(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id) {
|
||||
dyncfg_echo_payload(item, df, id, "update");
|
||||
}
|
||||
|
||||
void dyncfg_echo_add(const DICTIONARY_ITEM *template_item, DYNCFG *template_df, const char *template_id, const char *job_name) {
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
static void dyncfg_echo_payload_add(const DICTIONARY_ITEM *item_template __maybe_unused, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *id_template, const char *cmd) {
|
||||
RRDHOST *host = dyncfg_rrdhost(df_template);
|
||||
if(!host) {
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id_template);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!df_job->payload) {
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR,
|
||||
"DYNCFG: requested to send a '%s' to '%s', but there is no payload",
|
||||
cmd, id_template);
|
||||
return;
|
||||
}
|
||||
|
||||
struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo));
|
||||
e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item_job);
|
||||
e->wb = buffer_create(0, NULL);
|
||||
e->df = df_job;
|
||||
e->cmd = strdupz(cmd);
|
||||
|
||||
char buf[string_strlen(df_template->function) + strlen(cmd) + 20];
|
||||
snprintfz(buf, sizeof(buf), "%s %s", string2str(df_template->function), cmd);
|
||||
|
||||
rrd_function_run(
|
||||
host, e->wb, 10, HTTP_ACCESS_ADMIN, buf, false, NULL,
|
||||
dyncfg_echo_cb, e,
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
df_job->payload, string2str(df_job->source));
|
||||
}
|
||||
|
||||
void dyncfg_echo_add(const DICTIONARY_ITEM *item_template, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *template_id, const char *job_name) {
|
||||
char buf[strlen(job_name) + 20];
|
||||
snprintfz(buf, sizeof(buf), "add %s", job_name);
|
||||
dyncfg_echo_payload(template_item, template_df, template_id, buf);
|
||||
dyncfg_echo_payload_add(item_template, item_job, df_template, df_job, template_id, buf);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ void dyncfg_file_save(const char *id, DYNCFG *df) {
|
|||
fprintf(fp, "template=%s\n", string2str(df->template));
|
||||
|
||||
char uuid_str[UUID_COMPACT_STR_LEN];
|
||||
uuid_unparse_lower_compact(df->host_uuid, uuid_str);
|
||||
uuid_unparse_lower_compact(df->host_uuid.uuid, uuid_str);
|
||||
fprintf(fp, "host=%s\n", uuid_str);
|
||||
|
||||
fprintf(fp, "path=%s\n", string2str(df->path));
|
||||
|
@ -67,7 +67,6 @@ void dyncfg_file_load(const char *filename) {
|
|||
}
|
||||
|
||||
DYNCFG tmp = {
|
||||
.host = NULL,
|
||||
.status = DYNCFG_STATUS_ORPHAN,
|
||||
};
|
||||
|
||||
|
@ -110,7 +109,7 @@ void dyncfg_file_load(const char *filename) {
|
|||
} else if (strcmp(key, "template") == 0) {
|
||||
tmp.template = string_strdupz(value);
|
||||
} else if (strcmp(key, "host") == 0) {
|
||||
uuid_parse_flexi(value, tmp.host_uuid);
|
||||
uuid_parse_flexi(value, tmp.host_uuid.uuid);
|
||||
} else if (strcmp(key, "path") == 0) {
|
||||
tmp.path = string_strdupz(value);
|
||||
} else if (strcmp(key, "type") == 0) {
|
||||
|
|
|
@ -29,21 +29,22 @@ static int dyncfg_inline_callback(struct rrd_function_execute *rfe, void *data _
|
|||
|
||||
bool dyncfg_add(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, dyncfg_cb_t cb, void *data) {
|
||||
|
||||
if(dyncfg_add_low_level(host, id, path, status, type, source_type, source, cmds,
|
||||
struct dyncfg_node tmp = {
|
||||
.cmds = cmds,
|
||||
.type = type,
|
||||
.cb = cb,
|
||||
.data = data,
|
||||
};
|
||||
dictionary_set(dyncfg_nodes, id, &tmp, sizeof(tmp));
|
||||
|
||||
if(!dyncfg_add_low_level(host, id, path, status, type, source_type, source, cmds,
|
||||
0, 0, true,
|
||||
dyncfg_inline_callback, NULL)) {
|
||||
struct dyncfg_node tmp = {
|
||||
.cmds = cmds,
|
||||
.type = type,
|
||||
.cb = cb,
|
||||
.data = data,
|
||||
};
|
||||
dictionary_set(dyncfg_nodes, id, &tmp, sizeof(tmp));
|
||||
|
||||
return true;
|
||||
dictionary_del(dyncfg_nodes, id);
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void dyncfg_del(RRDHOST *host, const char *id) {
|
||||
|
|
|
@ -19,15 +19,34 @@ struct dyncfg_call {
|
|||
};
|
||||
|
||||
DYNCFG_STATUS dyncfg_status_from_successful_response(int code) {
|
||||
DYNCFG_STATUS status;
|
||||
if(code == DYNCFG_RESP_RUNNING)
|
||||
status = DYNCFG_STATUS_RUNNING;
|
||||
else if(code == DYNCFG_RESP_ACCEPTED || code == DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED)
|
||||
status = DYNCFG_STATUS_ACCEPTED;
|
||||
DYNCFG_STATUS status = DYNCFG_STATUS_ACCEPTED;
|
||||
|
||||
switch(code) {
|
||||
default:
|
||||
case DYNCFG_RESP_ACCEPTED:
|
||||
case DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED:
|
||||
status = DYNCFG_STATUS_ACCEPTED;
|
||||
break;
|
||||
|
||||
case DYNCFG_RESP_ACCEPTED_DISABLED:
|
||||
status = DYNCFG_STATUS_DISABLED;
|
||||
break;
|
||||
|
||||
case DYNCFG_RESP_RUNNING:
|
||||
status = DYNCFG_STATUS_RUNNING;
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void dyncfg_function_intercept_keep_source(DYNCFG *df, const char *source) {
|
||||
STRING *old = df->source;
|
||||
df->source = string_strdupz(source);
|
||||
string_freez(old);
|
||||
}
|
||||
|
||||
void dyncfg_function_intercept_result_cb(BUFFER *wb, int code, void *result_cb_data) {
|
||||
struct dyncfg_call *dc = result_cb_data;
|
||||
|
||||
|
@ -47,31 +66,40 @@ void dyncfg_function_intercept_result_cb(BUFFER *wb, int code, void *result_cb_d
|
|||
char id[strlen(dc->id) + 1 + strlen(dc->add_name) + 1];
|
||||
snprintfz(id, sizeof(id), "%s:%s", dc->id, dc->add_name);
|
||||
|
||||
const DICTIONARY_ITEM *new_item = dyncfg_add_internal(
|
||||
df->host,
|
||||
id,
|
||||
string2str(df->path),
|
||||
dyncfg_status_from_successful_response(code),
|
||||
DYNCFG_TYPE_JOB,
|
||||
DYNCFG_SOURCE_TYPE_DYNCFG,
|
||||
dc->source,
|
||||
(df->cmds & ~DYNCFG_CMD_ADD) | DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE | DYNCFG_CMD_TEST | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_REMOVE,
|
||||
0,
|
||||
0,
|
||||
df->sync,
|
||||
df->execute_cb, df->execute_cb_data, false);
|
||||
RRDHOST *host = dyncfg_rrdhost(df);
|
||||
if(!host) {
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR,
|
||||
"DYNCFG: cannot add job '%s' because host is missing", id);
|
||||
}
|
||||
else {
|
||||
const DICTIONARY_ITEM *new_item = dyncfg_add_internal(
|
||||
host,
|
||||
id,
|
||||
string2str(df->path),
|
||||
dyncfg_status_from_successful_response(code),
|
||||
DYNCFG_TYPE_JOB,
|
||||
DYNCFG_SOURCE_TYPE_DYNCFG,
|
||||
dc->source,
|
||||
(df->cmds & ~DYNCFG_CMD_ADD) | DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE | DYNCFG_CMD_TEST |
|
||||
DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_REMOVE,
|
||||
0,
|
||||
0,
|
||||
df->sync,
|
||||
df->execute_cb,
|
||||
df->execute_cb_data,
|
||||
false);
|
||||
|
||||
DYNCFG *new_df = dictionary_acquired_item_value(new_item);
|
||||
SWAP(new_df->payload, dc->payload);
|
||||
if(code == DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED)
|
||||
new_df->restart_required = true;
|
||||
DYNCFG *new_df = dictionary_acquired_item_value(new_item);
|
||||
SWAP(new_df->payload, dc->payload);
|
||||
if (code == DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED)
|
||||
new_df->restart_required = true;
|
||||
|
||||
dyncfg_file_save(id, new_df);
|
||||
dictionary_acquired_item_release(dyncfg_globals.nodes, new_item);
|
||||
dyncfg_file_save(id, new_df);
|
||||
dictionary_acquired_item_release(dyncfg_globals.nodes, new_item);
|
||||
}
|
||||
} else if (dc->cmd == DYNCFG_CMD_UPDATE) {
|
||||
df->source_type = DYNCFG_SOURCE_TYPE_DYNCFG;
|
||||
string_freez(df->source);
|
||||
df->source = string_strdupz(dc->source);
|
||||
dyncfg_function_intercept_keep_source(df, dc->source);
|
||||
|
||||
df->status = dyncfg_status_from_successful_response(code);
|
||||
SWAP(df->payload, dc->payload);
|
||||
|
@ -79,10 +107,13 @@ void dyncfg_function_intercept_result_cb(BUFFER *wb, int code, void *result_cb_d
|
|||
save_required = true;
|
||||
} else if (dc->cmd == DYNCFG_CMD_ENABLE) {
|
||||
df->user_disabled = false;
|
||||
dyncfg_function_intercept_keep_source(df, dc->source);
|
||||
} else if (dc->cmd == DYNCFG_CMD_DISABLE) {
|
||||
df->user_disabled = true;
|
||||
dyncfg_function_intercept_keep_source(df, dc->source);
|
||||
} else if (dc->cmd == DYNCFG_CMD_REMOVE) {
|
||||
dyncfg_file_delete(dc->id);
|
||||
dictionary_del(dyncfg_globals.nodes, dc->id);
|
||||
}
|
||||
|
||||
if(dc->cmd != DYNCFG_CMD_ADD && code == DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED)
|
||||
|
@ -151,10 +182,20 @@ void dyncfg_function_intercept_result_cb(BUFFER *wb, int code, void *result_cb_d
|
|||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
static void dyncfg_apply_action_on_all_template_jobs(const char *template_id, DYNCFG_CMDS c) {
|
||||
static void dyncfg_apply_action_on_all_template_jobs(struct rrd_function_execute *rfe, const char *template_id, DYNCFG_CMDS c) {
|
||||
STRING *template = string_strdupz(template_id);
|
||||
|
||||
DYNCFG *df;
|
||||
|
||||
size_t all = 0, done = 0;
|
||||
dfe_start_read(dyncfg_globals.nodes, df) {
|
||||
if(df->template == template && df->type == DYNCFG_TYPE_JOB)
|
||||
all++;
|
||||
}
|
||||
dfe_done(df);
|
||||
|
||||
if(rfe->progress.cb)
|
||||
rfe->progress.cb(rfe->progress.data, done, all);
|
||||
|
||||
dfe_start_reentrant(dyncfg_globals.nodes, df) {
|
||||
if(df->template == template && df->type == DYNCFG_TYPE_JOB) {
|
||||
DYNCFG_CMDS cmd_to_send_to_plugin = c;
|
||||
|
@ -165,6 +206,9 @@ static void dyncfg_apply_action_on_all_template_jobs(const char *template_id, DY
|
|||
cmd_to_send_to_plugin = DYNCFG_CMD_DISABLE;
|
||||
|
||||
dyncfg_echo(df_dfe.item, df, df_dfe.name, cmd_to_send_to_plugin);
|
||||
|
||||
if(rfe->progress.cb)
|
||||
rfe->progress.cb(rfe->progress.data, ++done, all);
|
||||
}
|
||||
}
|
||||
dfe_done(df);
|
||||
|
@ -260,12 +304,8 @@ int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data __
|
|||
else if(c == DYNCFG_CMD_SCHEMA) {
|
||||
bool loaded = false;
|
||||
if(df->type == DYNCFG_TYPE_JOB) {
|
||||
char template[strlen(id) + 1];
|
||||
memcpy(template, id, sizeof(template));
|
||||
char *colon = strrchr(template, ':');
|
||||
if(colon) *colon = '\0';
|
||||
if(template[0])
|
||||
loaded = dyncfg_get_schema(template, rfe->result.wb);
|
||||
if(df->template)
|
||||
loaded = dyncfg_get_schema(string2str(df->template), rfe->result.wb);
|
||||
}
|
||||
else
|
||||
loaded = dyncfg_get_schema(id, rfe->result.wb);
|
||||
|
@ -289,7 +329,7 @@ int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data __
|
|||
dyncfg_file_save(id, df);
|
||||
}
|
||||
|
||||
dyncfg_apply_action_on_all_template_jobs(id, c);
|
||||
dyncfg_apply_action_on_all_template_jobs(rfe, id, c);
|
||||
|
||||
rc = HTTP_RESP_OK;
|
||||
dyncfg_default_response(rfe->result.wb, rc, "applied");
|
||||
|
|
|
@ -10,8 +10,7 @@
|
|||
#include "../../database/rrdcollector-internals.h"
|
||||
|
||||
typedef struct dyncfg {
|
||||
RRDHOST *host;
|
||||
uuid_t host_uuid;
|
||||
UUID host_uuid;
|
||||
STRING *function;
|
||||
STRING *template;
|
||||
STRING *path;
|
||||
|
@ -54,7 +53,7 @@ bool dyncfg_get_schema(const char *id, BUFFER *dst);
|
|||
void dyncfg_echo_cb(BUFFER *wb, int code, void *result_cb_data);
|
||||
void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id, DYNCFG_CMDS cmd);
|
||||
void dyncfg_echo_update(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id);
|
||||
void dyncfg_echo_add(const DICTIONARY_ITEM *template_item, DYNCFG *template_df, const char *template_id, const char *job_name);
|
||||
void dyncfg_echo_add(const DICTIONARY_ITEM *item_template, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *template_id, const char *job_name);
|
||||
|
||||
const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, rrd_function_execute_cb_t execute_cb, void *execute_cb_data, bool overwrite_cb);
|
||||
int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data);
|
||||
|
@ -62,4 +61,7 @@ void dyncfg_cleanup(DYNCFG *v);
|
|||
|
||||
bool dyncfg_is_user_disabled(const char *id);
|
||||
|
||||
RRDHOST *dyncfg_rrdhost_by_uuid(UUID *uuid);
|
||||
RRDHOST *dyncfg_rrdhost(DYNCFG *df);
|
||||
|
||||
#endif //NETDATA_DYNCFG_INTERNALS_H
|
||||
|
|
|
@ -60,15 +60,15 @@ static void dyncfg_tree_for_host(RRDHOST *host, BUFFER *wb, const char *path, co
|
|||
if(id && *id)
|
||||
template = string_strdupz(id);
|
||||
|
||||
UUID host_uuid = uuid2UUID(host->host_uuid);
|
||||
|
||||
size_t path_len = strlen(path);
|
||||
DYNCFG *df;
|
||||
dfe_start_read(dyncfg_globals.nodes, df) {
|
||||
if(!df->host) {
|
||||
if(uuid_memcmp(&df->host_uuid, &host->host_uuid) == 0)
|
||||
df->host = host;
|
||||
}
|
||||
if(!UUIDeq(df->host_uuid, host_uuid))
|
||||
continue;
|
||||
|
||||
if(df->host != host || strncmp(string2str(df->path), path, path_len) != 0)
|
||||
if(strncmp(string2str(df->path), path, path_len) != 0)
|
||||
continue;
|
||||
|
||||
if(!rrd_function_available(host, string2str(df->function)))
|
||||
|
|
|
@ -450,6 +450,11 @@ static int dyncfg_unittest_run(const char *cmd, BUFFER *wb, const char *payload,
|
|||
if(c == DYNCFG_CMD_UPDATE)
|
||||
memset(&t->current.value, 0, sizeof(t->current.value));
|
||||
|
||||
if(c & (DYNCFG_CMD_UPDATE) || (c & (DYNCFG_CMD_DISABLE|DYNCFG_CMD_ENABLE) && t->type != DYNCFG_TYPE_TEMPLATE)) {
|
||||
freez((void *)t->source);
|
||||
t->source = strdupz(source);
|
||||
}
|
||||
|
||||
buffer_flush(wb);
|
||||
|
||||
CLEAN_BUFFER *pld = NULL;
|
||||
|
|
|
@ -5,6 +5,21 @@
|
|||
|
||||
struct dyncfg_globals dyncfg_globals = { 0 };
|
||||
|
||||
RRDHOST *dyncfg_rrdhost_by_uuid(UUID *uuid) {
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
uuid_unparse_lower(uuid->uuid, uuid_str);
|
||||
|
||||
RRDHOST *host = rrdhost_find_by_guid(uuid_str);
|
||||
if(!host)
|
||||
nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host with UUID '%s'", uuid_str);
|
||||
|
||||
return host;
|
||||
}
|
||||
|
||||
RRDHOST *dyncfg_rrdhost(DYNCFG *df) {
|
||||
return dyncfg_rrdhost_by_uuid(&df->host_uuid);
|
||||
}
|
||||
|
||||
void dyncfg_cleanup(DYNCFG *v) {
|
||||
buffer_free(v->payload);
|
||||
v->payload = NULL;
|
||||
|
@ -69,8 +84,8 @@ static bool dyncfg_conflict_cb(const DICTIONARY_ITEM *item __maybe_unused, void
|
|||
|
||||
dyncfg_normalize(nv);
|
||||
|
||||
if(v->host != nv->host) {
|
||||
SWAP(v->host, nv->host);
|
||||
if(!UUIDeq(v->host_uuid, nv->host_uuid)) {
|
||||
SWAP(v->host_uuid, nv->host_uuid);
|
||||
changes++;
|
||||
}
|
||||
|
||||
|
@ -164,7 +179,7 @@ void dyncfg_init_low_level(bool load_saved) {
|
|||
|
||||
const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, rrd_function_execute_cb_t execute_cb, void *execute_cb_data, bool overwrite_cb) {
|
||||
DYNCFG tmp = {
|
||||
.host = host,
|
||||
.host_uuid = uuid2UUID(host->host_uuid),
|
||||
.path = string_strdupz(path),
|
||||
.status = status,
|
||||
.type = type,
|
||||
|
@ -181,7 +196,6 @@ const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const
|
|||
.execute_cb_data = execute_cb_data,
|
||||
.overwrite_cb = overwrite_cb,
|
||||
};
|
||||
uuid_copy(tmp.host_uuid, host->host_uuid);
|
||||
|
||||
return dictionary_set_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1, &tmp, sizeof(tmp), NULL);
|
||||
}
|
||||
|
@ -196,21 +210,26 @@ static void dyncfg_send_updates(const char *id) {
|
|||
DYNCFG *df = dictionary_acquired_item_value(item);
|
||||
|
||||
if(df->type == DYNCFG_TYPE_SINGLE || df->type == DYNCFG_TYPE_JOB) {
|
||||
if (df->cmds & DYNCFG_CMD_UPDATE)
|
||||
if (df->cmds & DYNCFG_CMD_UPDATE && df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->payload && buffer_strlen(df->payload))
|
||||
dyncfg_echo_update(item, df, id);
|
||||
}
|
||||
else if(df->type == DYNCFG_TYPE_TEMPLATE && (df->cmds & DYNCFG_CMD_ADD)) {
|
||||
STRING *template = string_strdupz(id);
|
||||
|
||||
size_t len = strlen(id);
|
||||
DYNCFG *tf;
|
||||
dfe_start_reentrant(dyncfg_globals.nodes, tf) {
|
||||
const char *t_id = tf_dfe.name;
|
||||
if(tf->type == DYNCFG_TYPE_JOB && tf->template == template && strncmp(t_id, id, len) == 0 && t_id[len] == ':' && t_id[len + 1]) {
|
||||
dyncfg_echo_add(item, df, id, &t_id[len + 1]);
|
||||
DYNCFG *df_job;
|
||||
dfe_start_reentrant(dyncfg_globals.nodes, df_job) {
|
||||
const char *id_template = df_job_dfe.name;
|
||||
if(df_job->type == DYNCFG_TYPE_JOB && // it is a job
|
||||
df_job->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && // it is dynamically configured
|
||||
df_job->template == template && // it has the same template name
|
||||
strncmp(id_template, id, len) == 0 && // the template name matches (redundant)
|
||||
id_template[len] == ':' && // immediately after the template there is ':'
|
||||
id_template[len + 1]) { // and there is something else after the ':'
|
||||
dyncfg_echo_add(item, df_job_dfe.item, df, df_job, id, &id_template[len + 1]);
|
||||
}
|
||||
}
|
||||
dfe_done(tf);
|
||||
dfe_done(df_job);
|
||||
|
||||
string_freez(template);
|
||||
}
|
||||
|
@ -279,11 +298,7 @@ bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, DYNCF
|
|||
}
|
||||
|
||||
// remove
|
||||
if(source_type == DYNCFG_SOURCE_TYPE_DYNCFG && type == DYNCFG_TYPE_JOB) {
|
||||
// remove is only available for dyncfg jobs
|
||||
cmds |= DYNCFG_CMD_REMOVE;
|
||||
}
|
||||
else {
|
||||
if(source_type != DYNCFG_SOURCE_TYPE_DYNCFG || type != DYNCFG_TYPE_JOB) {
|
||||
// remove is only available for dyncfg jobs
|
||||
cmds &= ~DYNCFG_CMD_REMOVE;
|
||||
}
|
||||
|
@ -323,12 +338,19 @@ bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, DYNCF
|
|||
dyncfg_function_intercept_cb,
|
||||
NULL);
|
||||
|
||||
DYNCFG_CMDS status_to_send_to_plugin = df->user_disabled ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE;
|
||||
if(status_to_send_to_plugin == DYNCFG_CMD_ENABLE && dyncfg_is_user_disabled(string2str(df->template)))
|
||||
status_to_send_to_plugin = DYNCFG_CMD_DISABLE;
|
||||
if(df->type != DYNCFG_TYPE_TEMPLATE) {
|
||||
DYNCFG_CMDS status_to_send_to_plugin =
|
||||
(df->user_disabled || df->status == DYNCFG_STATUS_DISABLED) ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE;
|
||||
|
||||
if (status_to_send_to_plugin == DYNCFG_CMD_ENABLE && dyncfg_is_user_disabled(string2str(df->template)))
|
||||
status_to_send_to_plugin = DYNCFG_CMD_DISABLE;
|
||||
|
||||
dyncfg_echo(item, df, id, status_to_send_to_plugin);
|
||||
}
|
||||
|
||||
if(!(df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->type == DYNCFG_TYPE_JOB))
|
||||
dyncfg_send_updates(id);
|
||||
|
||||
dyncfg_echo(item, df, id, status_to_send_to_plugin);
|
||||
dyncfg_send_updates(id);
|
||||
dictionary_acquired_item_release(dyncfg_globals.nodes, item);
|
||||
|
||||
return true;
|
||||
|
|
|
@ -1369,12 +1369,6 @@ int get_system_info(struct rrdhost_system_info *system_info) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
void set_silencers_filename() {
|
||||
char filename[FILENAME_MAX + 1];
|
||||
snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir);
|
||||
silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename);
|
||||
}
|
||||
|
||||
/* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST*
|
||||
be set in this procedure to be called in all the relevant code paths.
|
||||
*/
|
||||
|
@ -1407,7 +1401,7 @@ int unittest_prepare_rrd(char **user) {
|
|||
get_netdata_configured_variables();
|
||||
default_rrd_update_every = 1;
|
||||
default_rrd_memory_mode = RRD_MEMORY_MODE_RAM;
|
||||
default_health_enabled = 0;
|
||||
health_plugin_disable();
|
||||
storage_tiers = 1;
|
||||
registry_init();
|
||||
if(rrd_init("unittest", NULL, true)) {
|
||||
|
@ -2006,7 +2000,7 @@ int main(int argc, char **argv) {
|
|||
// --------------------------------------------------------------------
|
||||
// This is the safest place to start the SILENCERS structure
|
||||
|
||||
set_silencers_filename();
|
||||
health_set_silencers_filename();
|
||||
health_initialize_global_silencers();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
@ -2071,9 +2065,9 @@ int main(int argc, char **argv) {
|
|||
|
||||
#ifdef ENABLE_H2O
|
||||
delta_startup_time("initialize h2o server");
|
||||
for (int i = 0; static_threads[i].name; i++) {
|
||||
if (static_threads[i].start_routine == h2o_main)
|
||||
static_threads[i].enabled = httpd_is_enabled();
|
||||
for (int t = 0; static_threads[t].name; t++) {
|
||||
if (static_threads[t].start_routine == h2o_main)
|
||||
static_threads[t].enabled = httpd_is_enabled();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -35,8 +35,6 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) {
|
|||
rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE);
|
||||
|
||||
if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
rrddimvar_delete_all(rd);
|
||||
|
||||
/* only a collector can mark a chart as obsolete, so we must remove the reference */
|
||||
|
||||
size_t tiers_available = 0, tiers_said_no_retention = 0;
|
||||
|
@ -115,9 +113,7 @@ static void svc_rrdset_obsolete_to_free(RRDSET *st) {
|
|||
|
||||
worker_is_busy(WORKER_JOB_FREE_CHART);
|
||||
|
||||
rrdcalc_unlink_all_rrdset_alerts(st);
|
||||
|
||||
rrdsetvar_release_and_delete_all(st);
|
||||
rrdcalc_unlink_and_delete_all_rrdset_alerts(st);
|
||||
|
||||
// has to be run after all dimensions are archived - or use-after-free will occur
|
||||
rrdvar_delete_all(st->rrdvars);
|
||||
|
|
|
@ -1635,40 +1635,40 @@ int unit_test(long delay, long shift)
|
|||
|
||||
int test_sqlite(void) {
|
||||
fprintf(stderr, "%s() running...\n", __FUNCTION__ );
|
||||
sqlite3 *db_meta;
|
||||
sqlite3 *db_mt;
|
||||
fprintf(stderr, "Testing SQLIte\n");
|
||||
|
||||
int rc = sqlite3_open(":memory:", &db_meta);
|
||||
int rc = sqlite3_open(":memory:", &db_mt);
|
||||
if (rc != SQLITE_OK) {
|
||||
fprintf(stderr,"Failed to test SQLite: DB init failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
rc = sqlite3_exec_monitored(db_meta, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL);
|
||||
rc = sqlite3_exec_monitored(db_mt, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL);
|
||||
if (rc != SQLITE_OK) {
|
||||
fprintf(stderr,"Failed to test SQLite: Create table failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
rc = sqlite3_exec_monitored(db_meta, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL);
|
||||
rc = sqlite3_exec_monitored(db_mt, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL);
|
||||
if (rc != SQLITE_OK) {
|
||||
fprintf(stderr,"Failed to test SQLite: Delete with LIMIT failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
rc = sqlite3_exec_monitored(db_meta, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL);
|
||||
rc = sqlite3_exec_monitored(db_mt, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL);
|
||||
if (rc != SQLITE_OK) {
|
||||
fprintf(stderr,"Failed to test SQLite: Update with LIMIT failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
rc = sqlite3_create_function(db_meta, "now_usec", 1, SQLITE_ANY, 0, sqlite_now_usec, 0, 0);
|
||||
rc = sqlite3_create_function(db_mt, "now_usec", 1, SQLITE_ANY, 0, sqlite_now_usec, 0, 0);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
fprintf(stderr, "Failed to register internal now_usec function");
|
||||
return 1;
|
||||
}
|
||||
|
||||
rc = sqlite3_exec_monitored(db_meta, "UPDATE MINE SET id1=now_usec(0);", 0, 0, NULL);
|
||||
rc = sqlite3_exec_monitored(db_mt, "UPDATE MINE SET id1=now_usec(0);", 0, 0, NULL);
|
||||
if (rc != SQLITE_OK) {
|
||||
fprintf(stderr,"Failed to test SQLite: Update with now_usec() failed\n");
|
||||
return 1;
|
||||
|
@ -1678,16 +1678,16 @@ int test_sqlite(void) {
|
|||
char *uuid_str = "0000_000";
|
||||
|
||||
buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str);
|
||||
rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL);
|
||||
rc = sqlite3_exec_monitored(db_mt, buffer_tostring(sql), 0, 0, NULL);
|
||||
if (rc != SQLITE_OK)
|
||||
goto error;
|
||||
|
||||
buffer_free(sql);
|
||||
fprintf(stderr,"SQLite is OK\n");
|
||||
rc = sqlite3_close_v2(db_meta);
|
||||
rc = sqlite3_close_v2(db_mt);
|
||||
return 0;
|
||||
error:
|
||||
rc = sqlite3_close_v2(db_meta);
|
||||
rc = sqlite3_close_v2(db_mt);
|
||||
fprintf(stderr,"SQLite statement failed: %s\n", buffer_tostring(sql));
|
||||
buffer_free(sql);
|
||||
fprintf(stderr,"SQLite tests failed\n");
|
||||
|
@ -1837,7 +1837,7 @@ static RRDHOST *dbengine_rrdhost_find_or_create(char *name)
|
|||
default_rrd_update_every,
|
||||
default_rrd_history_entries,
|
||||
RRD_MEMORY_MODE_DBENGINE,
|
||||
default_health_enabled,
|
||||
health_plugin_enabled(),
|
||||
default_rrdpush_enabled,
|
||||
default_rrdpush_destination,
|
||||
default_rrdpush_api_key,
|
||||
|
|
|
@ -183,6 +183,13 @@ struct context_v2_entry {
|
|||
FTS_MATCH match;
|
||||
};
|
||||
|
||||
struct alert_counts {
|
||||
size_t critical;
|
||||
size_t warning;
|
||||
size_t clear;
|
||||
size_t error;
|
||||
};
|
||||
|
||||
struct alert_v2_entry {
|
||||
RRDCALC *tmp;
|
||||
|
||||
|
@ -191,16 +198,25 @@ struct alert_v2_entry {
|
|||
|
||||
size_t ati;
|
||||
|
||||
size_t critical;
|
||||
size_t warning;
|
||||
size_t clear;
|
||||
size_t error;
|
||||
struct alert_counts counts;
|
||||
|
||||
size_t instances;
|
||||
DICTIONARY *nodes;
|
||||
DICTIONARY *configs;
|
||||
};
|
||||
|
||||
struct alert_by_x_entry {
|
||||
struct {
|
||||
struct alert_counts counts;
|
||||
size_t silent;
|
||||
size_t total;
|
||||
} running;
|
||||
|
||||
struct {
|
||||
size_t available;
|
||||
} prototypes;
|
||||
};
|
||||
|
||||
typedef struct full_text_search_index {
|
||||
size_t searches;
|
||||
size_t string_searches;
|
||||
|
@ -254,8 +270,14 @@ struct rrdcontext_to_json_v2_data {
|
|||
|
||||
size_t ati;
|
||||
|
||||
DICTIONARY *alerts;
|
||||
DICTIONARY *summary;
|
||||
DICTIONARY *alert_instances;
|
||||
|
||||
DICTIONARY *by_type;
|
||||
DICTIONARY *by_component;
|
||||
DICTIONARY *by_classification;
|
||||
DICTIONARY *by_recipient;
|
||||
DICTIONARY *by_module;
|
||||
} alerts;
|
||||
|
||||
struct {
|
||||
|
@ -279,9 +301,7 @@ struct rrdcontext_to_json_v2_data {
|
|||
struct query_timings timings;
|
||||
};
|
||||
|
||||
static void alerts_v2_add(struct alert_v2_entry *t, RRDCALC *rc) {
|
||||
t->instances++;
|
||||
|
||||
static void alert_counts_add(struct alert_counts *t, RRDCALC *rc) {
|
||||
switch(rc->status) {
|
||||
case RRDCALC_STATUS_CRITICAL:
|
||||
t->critical++;
|
||||
|
@ -306,20 +326,51 @@ static void alerts_v2_add(struct alert_v2_entry *t, RRDCALC *rc) {
|
|||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void alerts_v2_add(struct alert_v2_entry *t, RRDCALC *rc) {
|
||||
t->instances++;
|
||||
|
||||
alert_counts_add(&t->counts, rc);
|
||||
|
||||
dictionary_set(t->nodes, rc->rrdset->rrdhost->machine_guid, NULL, 0);
|
||||
|
||||
char key[UUID_STR_LEN + 1];
|
||||
uuid_unparse_lower(rc->config_hash_id, key);
|
||||
uuid_unparse_lower(rc->config.hash_id, key);
|
||||
dictionary_set(t->configs, key, NULL, 0);
|
||||
}
|
||||
|
||||
static void alerts_by_x_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) {
|
||||
static STRING *silent = NULL;
|
||||
if(unlikely(!silent)) silent = string_strdupz("silent");
|
||||
|
||||
struct alert_by_x_entry *b = value;
|
||||
RRDCALC *rc = data;
|
||||
if(!rc) {
|
||||
// prototype
|
||||
b->prototypes.available++;
|
||||
}
|
||||
else {
|
||||
alert_counts_add(&b->running.counts, rc);
|
||||
|
||||
b->running.total++;
|
||||
|
||||
if (rc->config.recipient == silent)
|
||||
b->running.silent++;
|
||||
}
|
||||
}
|
||||
|
||||
static bool alerts_by_x_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value __maybe_unused, void *data __maybe_unused) {
|
||||
alerts_by_x_insert_callback(item, old_value, data);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void alerts_v2_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) {
|
||||
struct rrdcontext_to_json_v2_data *ctl = data;
|
||||
struct alert_v2_entry *t = value;
|
||||
RRDCALC *rc = t->tmp;
|
||||
t->name = rc->name;
|
||||
t->summary = rc->original_summary;
|
||||
t->name = rc->config.name;
|
||||
t->summary = rc->config.summary; // the original summary
|
||||
t->ati = ctl->alerts.ati++;
|
||||
|
||||
t->nodes = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_VALUE_LINK_DONT_CLONE|DICT_OPTION_NAME_LINK_DONT_CLONE);
|
||||
|
@ -350,16 +401,16 @@ static void alert_instances_v2_insert_callback(const DICTIONARY_ITEM *item __may
|
|||
t->chart_id = rc->rrdset->id;
|
||||
t->chart_name = rc->rrdset->name;
|
||||
t->family = rc->rrdset->family;
|
||||
t->units = rc->units;
|
||||
t->classification = rc->classification;
|
||||
t->type = rc->type;
|
||||
t->recipient = rc->recipient;
|
||||
t->component = rc->component;
|
||||
t->name = rc->name;
|
||||
t->source = rc->source;
|
||||
t->units = rc->config.units;
|
||||
t->classification = rc->config.classification;
|
||||
t->type = rc->config.type;
|
||||
t->recipient = rc->config.recipient;
|
||||
t->component = rc->config.component;
|
||||
t->name = rc->config.name;
|
||||
t->source = rc->config.source;
|
||||
t->status = rc->status;
|
||||
t->flags = rc->run_flags;
|
||||
t->info = rc->info;
|
||||
t->info = rc->config.info;
|
||||
t->summary = rc->summary;
|
||||
t->value = rc->value;
|
||||
t->last_updated = rc->last_updated;
|
||||
|
@ -368,10 +419,8 @@ static void alert_instances_v2_insert_callback(const DICTIONARY_ITEM *item __may
|
|||
t->host = rc->rrdset->rrdhost;
|
||||
t->alarm_id = rc->id;
|
||||
t->ni = ctl->nodes.ni;
|
||||
t->name = rc->name;
|
||||
|
||||
uuid_copy(t->config_hash_id, rc->config_hash_id);
|
||||
|
||||
uuid_copy(t->config_hash_id, rc->config.hash_id);
|
||||
health_alarm_log_get_global_id_and_transition_id_for_rrdcalc(rc, &t->global_id, &t->last_transition_id);
|
||||
}
|
||||
|
||||
|
@ -437,12 +486,12 @@ static FTS_MATCH rrdcontext_to_json_v2_full_text_search(struct rrdcontext_to_jso
|
|||
RRDSET *st = ri->rrdset;
|
||||
rw_spinlock_read_lock(&st->alerts.spinlock);
|
||||
for (RRDCALC *rcl = st->alerts.base; rcl; rcl = rcl->next) {
|
||||
if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->name))) {
|
||||
if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->config.name))) {
|
||||
matched = FTS_MATCHED_ALERT;
|
||||
break;
|
||||
}
|
||||
|
||||
if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->info))) {
|
||||
if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->config.info))) {
|
||||
matched = FTS_MATCHED_ALERT_INFO;
|
||||
break;
|
||||
}
|
||||
|
@ -462,7 +511,7 @@ static bool rrdcontext_matches_alert(struct rrdcontext_to_json_v2_data *ctl, RRD
|
|||
RRDSET *st = ri->rrdset;
|
||||
rw_spinlock_read_lock(&st->alerts.spinlock);
|
||||
for (RRDCALC *rcl = st->alerts.base; rcl; rcl = rcl->next) {
|
||||
if(ctl->alerts.alert_name_pattern && !simple_pattern_matches_string(ctl->alerts.alert_name_pattern, rcl->name))
|
||||
if(ctl->alerts.alert_name_pattern && !simple_pattern_matches_string(ctl->alerts.alert_name_pattern, rcl->config.name))
|
||||
continue;
|
||||
|
||||
if(ctl->alerts.alarm_id_filter && ctl->alerts.alarm_id_filter != rcl->id)
|
||||
|
@ -502,11 +551,51 @@ static bool rrdcontext_matches_alert(struct rrdcontext_to_json_v2_data *ctl, RRD
|
|||
struct alert_v2_entry t = {
|
||||
.tmp = rcl,
|
||||
};
|
||||
struct alert_v2_entry *a2e = dictionary_set(ctl->alerts.alerts, string2str(rcl->name), &t,
|
||||
sizeof(struct alert_v2_entry));
|
||||
struct alert_v2_entry *a2e =
|
||||
dictionary_set(ctl->alerts.summary, string2str(rcl->config.name),
|
||||
&t, sizeof(struct alert_v2_entry));
|
||||
size_t ati = a2e->ati;
|
||||
matches++;
|
||||
|
||||
dictionary_set_advanced(ctl->alerts.by_type,
|
||||
string2str(rcl->config.type),
|
||||
(ssize_t)string_strlen(rcl->config.type),
|
||||
NULL,
|
||||
sizeof(struct alert_by_x_entry),
|
||||
rcl);
|
||||
|
||||
dictionary_set_advanced(ctl->alerts.by_component,
|
||||
string2str(rcl->config.component),
|
||||
(ssize_t)string_strlen(rcl->config.component),
|
||||
NULL,
|
||||
sizeof(struct alert_by_x_entry),
|
||||
rcl);
|
||||
|
||||
dictionary_set_advanced(ctl->alerts.by_classification,
|
||||
string2str(rcl->config.classification),
|
||||
(ssize_t)string_strlen(rcl->config.classification),
|
||||
NULL,
|
||||
sizeof(struct alert_by_x_entry),
|
||||
rcl);
|
||||
|
||||
dictionary_set_advanced(ctl->alerts.by_recipient,
|
||||
string2str(rcl->config.recipient),
|
||||
(ssize_t)string_strlen(rcl->config.recipient),
|
||||
NULL,
|
||||
sizeof(struct alert_by_x_entry),
|
||||
rcl);
|
||||
|
||||
char *module = NULL;
|
||||
rrdlabels_get_value_strdup_or_null(st->rrdlabels, &module, "_collect_module");
|
||||
if(!module || !*module) module = "[unset]";
|
||||
|
||||
dictionary_set_advanced(ctl->alerts.by_module,
|
||||
module,
|
||||
-1,
|
||||
NULL,
|
||||
sizeof(struct alert_by_x_entry),
|
||||
rcl);
|
||||
|
||||
if (ctl->options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES | CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) {
|
||||
char key[20 + 1];
|
||||
snprintfz(key, sizeof(key) - 1, "%p", rcl);
|
||||
|
@ -1254,7 +1343,7 @@ static void contexts_v2_alert_config_to_json_from_sql_alert_config_data(struct s
|
|||
buffer_json_member_add_time_t(wb, "before", t->value.db.before);
|
||||
buffer_json_member_add_string(wb, "method", t->value.db.method);
|
||||
buffer_json_member_add_string(wb, "dimensions", t->value.db.dimensions);
|
||||
web_client_api_request_v1_data_options_to_buffer_json_array(wb, "options",(RRDR_OPTIONS) t->value.db.options);
|
||||
rrdr_options_to_buffer_json_array(wb, "options", (RRDR_OPTIONS)t->value.db.options);
|
||||
}
|
||||
buffer_json_object_close(wb); // db
|
||||
}
|
||||
|
@ -1394,6 +1483,41 @@ static int contexts_v2_alert_instance_to_json_callback(const DICTIONARY_ITEM *it
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void contexts_v2_alerts_by_x_update_prototypes(void *data, STRING *type, STRING *component, STRING *classification, STRING *recipient) {
|
||||
struct rrdcontext_to_json_v2_data *ctl = data;
|
||||
|
||||
dictionary_set_advanced(ctl->alerts.by_type, string2str(type), (ssize_t)string_strlen(type), NULL, sizeof(struct alert_by_x_entry), NULL);
|
||||
dictionary_set_advanced(ctl->alerts.by_component, string2str(component), (ssize_t)string_strlen(component), NULL, sizeof(struct alert_by_x_entry), NULL);
|
||||
dictionary_set_advanced(ctl->alerts.by_classification, string2str(classification), (ssize_t)string_strlen(classification), NULL, sizeof(struct alert_by_x_entry), NULL);
|
||||
dictionary_set_advanced(ctl->alerts.by_recipient, string2str(recipient), (ssize_t)string_strlen(recipient), NULL, sizeof(struct alert_by_x_entry), NULL);
|
||||
}
|
||||
|
||||
static void contexts_v2_alerts_by_x_to_json(BUFFER *wb, DICTIONARY *dict, const char *key) {
|
||||
buffer_json_member_add_array(wb, key);
|
||||
{
|
||||
struct alert_by_x_entry *b;
|
||||
dfe_start_read(dict, b) {
|
||||
buffer_json_add_array_item_object(wb);
|
||||
{
|
||||
buffer_json_member_add_string(wb, "name", b_dfe.name);
|
||||
buffer_json_member_add_uint64(wb, "cr", b->running.counts.critical);
|
||||
buffer_json_member_add_uint64(wb, "wr", b->running.counts.warning);
|
||||
buffer_json_member_add_uint64(wb, "cl", b->running.counts.clear);
|
||||
buffer_json_member_add_uint64(wb, "er", b->running.counts.error);
|
||||
buffer_json_member_add_uint64(wb, "running", b->running.total);
|
||||
|
||||
buffer_json_member_add_uint64(wb, "running_silent", b->running.silent);
|
||||
|
||||
if(b->prototypes.available)
|
||||
buffer_json_member_add_uint64(wb, "available", b->prototypes.available);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
}
|
||||
dfe_done(b);
|
||||
}
|
||||
buffer_json_array_close(wb);
|
||||
}
|
||||
|
||||
static void contexts_v2_alert_instances_to_json(BUFFER *wb, const char *key, struct rrdcontext_to_json_v2_data *ctl, bool debug) {
|
||||
buffer_json_member_add_array(wb, key);
|
||||
{
|
||||
|
@ -1413,7 +1537,7 @@ static void contexts_v2_alerts_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_
|
|||
buffer_json_member_add_array(wb, "alerts");
|
||||
{
|
||||
struct alert_v2_entry *t;
|
||||
dfe_start_read(ctl->alerts.alerts, t)
|
||||
dfe_start_read(ctl->alerts.summary, t)
|
||||
{
|
||||
buffer_json_add_array_item_object(wb);
|
||||
{
|
||||
|
@ -1421,10 +1545,10 @@ static void contexts_v2_alerts_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_
|
|||
buffer_json_member_add_string(wb, "nm", string2str(t->name));
|
||||
buffer_json_member_add_string(wb, "sum", string2str(t->summary));
|
||||
|
||||
buffer_json_member_add_uint64(wb, "cr", t->critical);
|
||||
buffer_json_member_add_uint64(wb, "wr", t->warning);
|
||||
buffer_json_member_add_uint64(wb, "cl", t->clear);
|
||||
buffer_json_member_add_uint64(wb, "er", t->error);
|
||||
buffer_json_member_add_uint64(wb, "cr", t->counts.critical);
|
||||
buffer_json_member_add_uint64(wb, "wr", t->counts.warning);
|
||||
buffer_json_member_add_uint64(wb, "cl", t->counts.clear);
|
||||
buffer_json_member_add_uint64(wb, "er", t->counts.error);
|
||||
|
||||
buffer_json_member_add_uint64(wb, "in", t->instances);
|
||||
buffer_json_member_add_uint64(wb, "nd", dictionary_entries(t->nodes));
|
||||
|
@ -1435,6 +1559,13 @@ static void contexts_v2_alerts_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_
|
|||
dfe_done(t);
|
||||
}
|
||||
buffer_json_array_close(wb); // alerts
|
||||
|
||||
health_prototype_metadata_foreach(ctl, contexts_v2_alerts_by_x_update_prototypes);
|
||||
contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_type, "alerts_by_type");
|
||||
contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_component, "alerts_by_component");
|
||||
contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_classification, "alerts_by_classification");
|
||||
contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_recipient, "alerts_by_recipient");
|
||||
contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_module, "alerts_by_module");
|
||||
}
|
||||
|
||||
if(ctl->request->options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES|CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) {
|
||||
|
@ -1942,12 +2073,42 @@ int rrdcontext_to_json_v2(BUFFER *wb, struct api_v2_contexts_request *req, CONTE
|
|||
}
|
||||
}
|
||||
|
||||
ctl.alerts.alerts = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
ctl.alerts.summary = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
NULL, sizeof(struct alert_v2_entry));
|
||||
|
||||
dictionary_register_insert_callback(ctl.alerts.alerts, alerts_v2_insert_callback, &ctl);
|
||||
dictionary_register_conflict_callback(ctl.alerts.alerts, alerts_v2_conflict_callback, &ctl);
|
||||
dictionary_register_delete_callback(ctl.alerts.alerts, alerts_v2_delete_callback, &ctl);
|
||||
dictionary_register_insert_callback(ctl.alerts.summary, alerts_v2_insert_callback, &ctl);
|
||||
dictionary_register_conflict_callback(ctl.alerts.summary, alerts_v2_conflict_callback, &ctl);
|
||||
dictionary_register_delete_callback(ctl.alerts.summary, alerts_v2_delete_callback, &ctl);
|
||||
|
||||
ctl.alerts.by_type = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
NULL, sizeof(struct alert_by_x_entry));
|
||||
|
||||
dictionary_register_insert_callback(ctl.alerts.by_type, alerts_by_x_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(ctl.alerts.by_type, alerts_by_x_conflict_callback, NULL);
|
||||
|
||||
ctl.alerts.by_component = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
NULL, sizeof(struct alert_by_x_entry));
|
||||
|
||||
dictionary_register_insert_callback(ctl.alerts.by_component, alerts_by_x_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(ctl.alerts.by_component, alerts_by_x_conflict_callback, NULL);
|
||||
|
||||
ctl.alerts.by_classification = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
NULL, sizeof(struct alert_by_x_entry));
|
||||
|
||||
dictionary_register_insert_callback(ctl.alerts.by_classification, alerts_by_x_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(ctl.alerts.by_classification, alerts_by_x_conflict_callback, NULL);
|
||||
|
||||
ctl.alerts.by_recipient = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
NULL, sizeof(struct alert_by_x_entry));
|
||||
|
||||
dictionary_register_insert_callback(ctl.alerts.by_recipient, alerts_by_x_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(ctl.alerts.by_recipient, alerts_by_x_conflict_callback, NULL);
|
||||
|
||||
ctl.alerts.by_module = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
NULL, sizeof(struct alert_by_x_entry));
|
||||
|
||||
dictionary_register_insert_callback(ctl.alerts.by_module, alerts_by_x_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(ctl.alerts.by_module, alerts_by_x_conflict_callback, NULL);
|
||||
|
||||
if(ctl.options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES | CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) {
|
||||
ctl.alerts.alert_instances = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
|
@ -2150,8 +2311,13 @@ cleanup:
|
|||
dictionary_destroy(ctl.nodes.dict);
|
||||
dictionary_destroy(ctl.contexts.dict);
|
||||
dictionary_destroy(ctl.functions.dict);
|
||||
dictionary_destroy(ctl.alerts.alerts);
|
||||
dictionary_destroy(ctl.alerts.summary);
|
||||
dictionary_destroy(ctl.alerts.alert_instances);
|
||||
dictionary_destroy(ctl.alerts.by_type);
|
||||
dictionary_destroy(ctl.alerts.by_component);
|
||||
dictionary_destroy(ctl.alerts.by_classification);
|
||||
dictionary_destroy(ctl.alerts.by_recipient);
|
||||
dictionary_destroy(ctl.alerts.by_module);
|
||||
simple_pattern_free(ctl.nodes.scope_pattern);
|
||||
simple_pattern_free(ctl.nodes.pattern);
|
||||
simple_pattern_free(ctl.contexts.pattern);
|
||||
|
|
|
@ -627,7 +627,7 @@ static bool query_target_match_alert_pattern(RRDINSTANCE_ACQUIRED *ria, SIMPLE_P
|
|||
rw_spinlock_read_lock(&st->alerts.spinlock);
|
||||
if (st->alerts.base) {
|
||||
for (RRDCALC *rc = st->alerts.base; rc; rc = rc->next) {
|
||||
SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_string_extract(pattern, rc->name, NULL, 0);
|
||||
SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_string_extract(pattern, rc->config.name, NULL, 0);
|
||||
|
||||
if(ret == SP_MATCHED_POSITIVE) {
|
||||
matched = true;
|
||||
|
@ -641,7 +641,7 @@ static bool query_target_match_alert_pattern(RRDINSTANCE_ACQUIRED *ria, SIMPLE_P
|
|||
else
|
||||
buffer_flush(wb);
|
||||
|
||||
buffer_fast_strcat(wb, string2str(rc->name), string_strlen(rc->name));
|
||||
buffer_fast_strcat(wb, string2str(rc->config.name), string_strlen(rc->config.name));
|
||||
buffer_fast_strcat(wb, ":", 1);
|
||||
buffer_strcat(wb, rrdcalc_status2string(rc->status));
|
||||
|
||||
|
|
|
@ -229,7 +229,7 @@ static void after_work_standard_callback(uv_work_t* req, int status) {
|
|||
worker_is_idle();
|
||||
}
|
||||
|
||||
static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct completion *completion, enum rrdeng_opcode opcode, work_cb work_cb, after_work_cb after_work_cb) {
|
||||
static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct completion *completion, enum rrdeng_opcode opcode, work_cb do_work_cb, after_work_cb do_after_work_cb) {
|
||||
struct rrdeng_work *work_request = NULL;
|
||||
|
||||
internal_fatal(rrdeng_main.tid != gettid(), "work_dispatch() can only be run from the event loop thread");
|
||||
|
@ -240,8 +240,8 @@ static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct com
|
|||
work_request->ctx = ctx;
|
||||
work_request->data = data;
|
||||
work_request->completion = completion;
|
||||
work_request->work_cb = work_cb;
|
||||
work_request->after_work_cb = after_work_cb;
|
||||
work_request->work_cb = do_work_cb;
|
||||
work_request->after_work_cb = do_after_work_cb;
|
||||
work_request->opcode = opcode;
|
||||
|
||||
if(uv_queue_work(&rrdeng_main.loop, &work_request->req, work_standard_worker, after_work_standard_callback)) {
|
||||
|
|
|
@ -20,14 +20,11 @@ typedef struct rrdhost RRDHOST;
|
|||
typedef struct rrddim RRDDIM;
|
||||
typedef struct rrdset RRDSET;
|
||||
typedef struct rrdcalc RRDCALC;
|
||||
typedef struct rrdcalctemplate RRDCALCTEMPLATE;
|
||||
typedef struct alarm_entry ALARM_ENTRY;
|
||||
|
||||
typedef struct rrdlabels RRDLABELS;
|
||||
|
||||
typedef struct rrdfamily_acquired RRDFAMILY_ACQUIRED;
|
||||
typedef struct rrdvar_acquired RRDVAR_ACQUIRED;
|
||||
typedef struct rrdsetvar_acquired RRDSETVAR_ACQUIRED;
|
||||
typedef struct rrdcalc_acquired RRDCALC_ACQUIRED;
|
||||
|
||||
typedef struct rrdhost_acquired RRDHOST_ACQUIRED;
|
||||
|
@ -106,11 +103,8 @@ struct ml_metrics_statistics {
|
|||
#include "daemon/common.h"
|
||||
#include "web/api/queries/query.h"
|
||||
#include "web/api/queries/rrdr.h"
|
||||
#include "rrdvar.h"
|
||||
#include "rrdsetvar.h"
|
||||
#include "rrddimvar.h"
|
||||
#include "rrdcalc.h"
|
||||
#include "rrdcalctemplate.h"
|
||||
#include "health/rrdvar.h"
|
||||
#include "health/rrdcalc.h"
|
||||
#include "rrdlabels.h"
|
||||
#include "streaming/rrdpush.h"
|
||||
#include "aclk/aclk_rrdhost_state.h"
|
||||
|
@ -214,16 +208,6 @@ typedef enum __attribute__ ((__packed__)) rrd_algorithm {
|
|||
RRD_ALGORITHM rrd_algorithm_id(const char *name);
|
||||
const char *rrd_algorithm_name(RRD_ALGORITHM algorithm);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRD FAMILY
|
||||
|
||||
const RRDFAMILY_ACQUIRED *rrdfamily_add_and_acquire(RRDHOST *host, const char *id);
|
||||
void rrdfamily_release(RRDHOST *host, const RRDFAMILY_ACQUIRED *rfa);
|
||||
void rrdfamily_index_init(RRDHOST *host);
|
||||
void rrdfamily_index_destroy(RRDHOST *host);
|
||||
DICTIONARY *rrdfamily_rrdvars_dict(const RRDFAMILY_ACQUIRED *rf);
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// flags & options
|
||||
|
||||
|
@ -740,9 +724,6 @@ struct rrdset {
|
|||
int32_t update_every; // data collection frequency
|
||||
|
||||
RRDLABELS *rrdlabels; // chart labels
|
||||
DICTIONARY *rrdsetvar_root_index; // chart variables
|
||||
DICTIONARY *rrddimvar_root_index; // dimension variables
|
||||
// we use this dictionary to manage their allocation
|
||||
|
||||
uint32_t version; // the metadata version (auto-increment)
|
||||
|
||||
|
@ -833,7 +814,6 @@ struct rrdset {
|
|||
NETDATA_DOUBLE red; // red threshold for this chart
|
||||
|
||||
DICTIONARY *rrdvars; // RRDVAR index for this chart
|
||||
const RRDFAMILY_ACQUIRED *rrdfamily; // pointer to RRDFAMILY dictionary item, this chart belongs to
|
||||
|
||||
struct {
|
||||
RW_SPINLOCK spinlock; // protection for RRDCALC *base
|
||||
|
@ -1274,9 +1254,6 @@ struct rrdhost {
|
|||
// all RRDCALCs are primarily allocated and linked here
|
||||
DICTIONARY *rrdcalc_root_index;
|
||||
|
||||
// templates of alarms
|
||||
DICTIONARY *rrdcalctemplate_root_index;
|
||||
|
||||
ALARM_LOG health_log; // alarms historical events (event log)
|
||||
uint32_t health_last_processed_id; // the last processed health id from the log
|
||||
uint32_t health_max_unique_id; // the max alarm log unique id given for the host
|
||||
|
@ -1306,7 +1283,6 @@ struct rrdhost {
|
|||
DICTIONARY *rrdset_root_index; // the host's charts index (by id)
|
||||
DICTIONARY *rrdset_root_index_name; // the host's charts index (by name)
|
||||
|
||||
DICTIONARY *rrdfamily_root_index; // the host's chart families index
|
||||
DICTIONARY *rrdvars; // the host's chart variables index
|
||||
// this includes custom host variables
|
||||
|
||||
|
@ -1414,8 +1390,8 @@ RRDHOST *rrdhost_find_or_create(
|
|||
const char *abbrev_timezone,
|
||||
int32_t utc_offset,
|
||||
const char *tags,
|
||||
const char *program_name,
|
||||
const char *program_version,
|
||||
const char *prog_name,
|
||||
const char *prog_version,
|
||||
int update_every,
|
||||
long history,
|
||||
RRD_MEMORY_MODE mode,
|
||||
|
@ -1606,7 +1582,7 @@ void rrdset_reset(RRDSET *st);
|
|||
void set_host_properties(
|
||||
RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, const char *registry_hostname,
|
||||
const char *os, const char *tags, const char *tzone, const char *abbrev_tzone, int32_t utc_offset,
|
||||
const char *program_name, const char *program_version);
|
||||
const char *prog_name, const char *prog_version);
|
||||
|
||||
size_t get_tier_grouping(size_t tier);
|
||||
void store_metric_collection_completed(void);
|
||||
|
|
|
@ -1,867 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC helpers
|
||||
|
||||
void rrdcalc_flags_to_json_array(BUFFER *wb, const char *key, RRDCALC_FLAGS flags) {
|
||||
buffer_json_member_add_array(wb, key);
|
||||
|
||||
if(flags & RRDCALC_FLAG_DB_ERROR)
|
||||
buffer_json_add_array_item_string(wb, "DB_ERROR");
|
||||
if(flags & RRDCALC_FLAG_DB_NAN)
|
||||
buffer_json_add_array_item_string(wb, "DB_NAN");
|
||||
if(flags & RRDCALC_FLAG_CALC_ERROR)
|
||||
buffer_json_add_array_item_string(wb, "CALC_ERROR");
|
||||
if(flags & RRDCALC_FLAG_WARN_ERROR)
|
||||
buffer_json_add_array_item_string(wb, "WARN_ERROR");
|
||||
if(flags & RRDCALC_FLAG_CRIT_ERROR)
|
||||
buffer_json_add_array_item_string(wb, "CRIT_ERROR");
|
||||
if(flags & RRDCALC_FLAG_RUNNABLE)
|
||||
buffer_json_add_array_item_string(wb, "RUNNABLE");
|
||||
if(flags & RRDCALC_FLAG_DISABLED)
|
||||
buffer_json_add_array_item_string(wb, "DISABLED");
|
||||
if(flags & RRDCALC_FLAG_SILENCED)
|
||||
buffer_json_add_array_item_string(wb, "SILENCED");
|
||||
if(flags & RRDCALC_FLAG_RUN_ONCE)
|
||||
buffer_json_add_array_item_string(wb, "RUN_ONCE");
|
||||
if(flags & RRDCALC_FLAG_FROM_TEMPLATE)
|
||||
buffer_json_add_array_item_string(wb, "FROM_TEMPLATE");
|
||||
|
||||
buffer_json_array_close(wb);
|
||||
}
|
||||
|
||||
inline const char *rrdcalc_status2string(RRDCALC_STATUS status) {
|
||||
switch(status) {
|
||||
case RRDCALC_STATUS_REMOVED:
|
||||
return "REMOVED";
|
||||
|
||||
case RRDCALC_STATUS_UNDEFINED:
|
||||
return "UNDEFINED";
|
||||
|
||||
case RRDCALC_STATUS_UNINITIALIZED:
|
||||
return "UNINITIALIZED";
|
||||
|
||||
case RRDCALC_STATUS_CLEAR:
|
||||
return "CLEAR";
|
||||
|
||||
case RRDCALC_STATUS_RAISED:
|
||||
return "RAISED";
|
||||
|
||||
case RRDCALC_STATUS_WARNING:
|
||||
return "WARNING";
|
||||
|
||||
case RRDCALC_STATUS_CRITICAL:
|
||||
return "CRITICAL";
|
||||
|
||||
default:
|
||||
netdata_log_error("Unknown alarm status %d", status);
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id) {
|
||||
rw_spinlock_read_lock(&host->health_log.spinlock);
|
||||
|
||||
// re-use old IDs, by looking them up in the alarm log
|
||||
ALARM_ENTRY *ae = NULL;
|
||||
for(ae = host->health_log.alarms; ae ;ae = ae->next) {
|
||||
if(unlikely(name == ae->name && chart == ae->chart && !uuid_memcmp(&ae->config_hash_id, config_hash_id))) {
|
||||
if(next_event_id) *next_event_id = ae->alarm_event_id + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t alarm_id;
|
||||
|
||||
if(ae)
|
||||
alarm_id = ae->alarm_id;
|
||||
|
||||
else {
|
||||
alarm_id = sql_get_alarm_id(host, chart, name, next_event_id, config_hash_id);
|
||||
|
||||
if (!alarm_id) {
|
||||
//check possible stored config hash as zeroes or null
|
||||
alarm_id = sql_get_alarm_id_check_zero_hash(host, chart, name, next_event_id, config_hash_id);
|
||||
if (!alarm_id) {
|
||||
if (unlikely(!host->health_log.next_alarm_id))
|
||||
host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
|
||||
|
||||
alarm_id = host->health_log.next_alarm_id++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rw_spinlock_read_unlock(&host->health_log.spinlock);
|
||||
return alarm_id;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC replacing info/summary text variables with RRDSET labels
|
||||
|
||||
static STRING *rrdcalc_replace_variables_with_rrdset_labels(const char *line, RRDCALC *rc) {
|
||||
if (!line || !*line)
|
||||
return NULL;
|
||||
|
||||
size_t pos = 0;
|
||||
char *temp = strdupz(line);
|
||||
char var[RRDCALC_VAR_MAX];
|
||||
char *m, *lbl_value = NULL;
|
||||
|
||||
while ((m = strchr(temp + pos, '$')) && *(m+1) == '{') {
|
||||
int i = 0;
|
||||
char *e = m;
|
||||
while (*e) {
|
||||
var[i++] = *e;
|
||||
|
||||
if (*e == '}' || i == RRDCALC_VAR_MAX - 1)
|
||||
break;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
||||
var[i] = '\0';
|
||||
pos = m - temp + 1;
|
||||
|
||||
if (!strcmp(var, RRDCALC_VAR_FAMILY)) {
|
||||
char *buf = find_and_replace(temp, var, (rc->rrdset && rc->rrdset->family) ? rrdset_family(rc->rrdset) : "", m);
|
||||
freez(temp);
|
||||
temp = buf;
|
||||
}
|
||||
else if (!strncmp(var, RRDCALC_VAR_LABEL, RRDCALC_VAR_LABEL_LEN)) {
|
||||
char label_val[RRDCALC_VAR_MAX + RRDCALC_VAR_LABEL_LEN + 1] = { 0 };
|
||||
strcpy(label_val, var+RRDCALC_VAR_LABEL_LEN);
|
||||
label_val[i - RRDCALC_VAR_LABEL_LEN - 1] = '\0';
|
||||
|
||||
if(likely(rc->rrdset && rc->rrdset->rrdlabels)) {
|
||||
lbl_value = NULL;
|
||||
rrdlabels_get_value_strdup_or_null(rc->rrdset->rrdlabels, &lbl_value, label_val);
|
||||
if (lbl_value) {
|
||||
char *buf = find_and_replace(temp, var, lbl_value, m);
|
||||
freez(temp);
|
||||
temp = buf;
|
||||
freez(lbl_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
STRING *ret = string_strdupz(temp);
|
||||
freez(temp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc) {
|
||||
if(!rc->rrdset || !rc->original_info || !rc->rrdset->rrdlabels) return;
|
||||
|
||||
size_t labels_version = rrdlabels_version(rc->rrdset->rrdlabels);
|
||||
if(rc->labels_version != labels_version) {
|
||||
|
||||
if (rc->original_info) {
|
||||
STRING *old = rc->info;
|
||||
rc->info = rrdcalc_replace_variables_with_rrdset_labels(rrdcalc_original_info(rc), rc);
|
||||
string_freez(old);
|
||||
}
|
||||
|
||||
if (rc->original_summary) {
|
||||
STRING *old = rc->summary;
|
||||
rc->summary = rrdcalc_replace_variables_with_rrdset_labels(rrdcalc_original_summary(rc), rc);
|
||||
string_freez(old);
|
||||
}
|
||||
|
||||
rc->labels_version = labels_version;
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC index management for RRDSET
|
||||
|
||||
// the dictionary requires a unique key for every item
|
||||
// we use {chart id}.{alert name} for both the RRDHOST and RRDSET alert indexes.
|
||||
|
||||
#define RRDCALC_MAX_KEY_SIZE 1024
|
||||
static size_t rrdcalc_key(char *dst, size_t dst_len, const char *chart, const char *alert) {
|
||||
return snprintfz(dst, dst_len, "%s/%s", chart, alert);
|
||||
}
|
||||
|
||||
const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name) {
|
||||
char key[RRDCALC_MAX_KEY_SIZE + 1];
|
||||
size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st), alert_name);
|
||||
|
||||
const RRDCALC_ACQUIRED *rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)key_len);
|
||||
|
||||
if(!rca) {
|
||||
key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_name(st), alert_name);
|
||||
rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)key_len);
|
||||
}
|
||||
|
||||
return rca;
|
||||
}
|
||||
|
||||
void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca) {
|
||||
if(!rca) return;
|
||||
|
||||
dictionary_acquired_item_release(st->rrdhost->rrdcalc_root_index, (const DICTIONARY_ITEM *)rca);
|
||||
}
|
||||
|
||||
RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca) {
|
||||
if(rca)
|
||||
return dictionary_acquired_item_value((const DICTIONARY_ITEM *)rca);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC managing the linking with RRDSET
|
||||
|
||||
static void rrdcalc_link_to_rrdset(RRDSET *st, RRDCALC *rc) {
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
netdata_log_debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host));
|
||||
|
||||
rc->last_status_change_value = rc->value;
|
||||
rc->last_status_change = now_realtime_sec();
|
||||
rc->rrdset = st;
|
||||
|
||||
rw_spinlock_write_lock(&st->alerts.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(st->alerts.base, rc, prev, next);
|
||||
rw_spinlock_write_unlock(&st->alerts.spinlock);
|
||||
|
||||
if(rc->update_every < rc->rrdset->update_every) {
|
||||
netdata_log_info("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every);
|
||||
rc->update_every = rc->rrdset->update_every;
|
||||
}
|
||||
|
||||
if(!isnan(rc->green) && isnan(st->green)) {
|
||||
netdata_log_debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " NETDATA_DOUBLE_FORMAT_AUTO
|
||||
" to " NETDATA_DOUBLE_FORMAT_AUTO ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->green, rc->green);
|
||||
st->green = rc->green;
|
||||
}
|
||||
|
||||
if(!isnan(rc->red) && isnan(st->red)) {
|
||||
netdata_log_debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " NETDATA_DOUBLE_FORMAT_AUTO " to " NETDATA_DOUBLE_FORMAT_AUTO
|
||||
".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->red, rc->red);
|
||||
st->red = rc->red;
|
||||
}
|
||||
|
||||
char buf[RRDVAR_MAX_LENGTH + 1];
|
||||
snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), rrdcalc_name(rc));
|
||||
STRING *rrdset_name_rrdcalc_name = string_strdupz(buf);
|
||||
snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), rrdcalc_name(rc));
|
||||
STRING *rrdset_id_rrdcalc_name = string_strdupz(buf);
|
||||
|
||||
rc->rrdvar_local = rrdvar_add_and_acquire(
|
||||
"local",
|
||||
st->rrdvars,
|
||||
rc->name,
|
||||
RRDVAR_TYPE_CALCULATED,
|
||||
RRDVAR_FLAG_RRDCALC_LOCAL_VAR,
|
||||
&rc->value);
|
||||
|
||||
rc->rrdvar_family = rrdvar_add_and_acquire(
|
||||
"family",
|
||||
rrdfamily_rrdvars_dict(st->rrdfamily),
|
||||
rc->name,
|
||||
RRDVAR_TYPE_CALCULATED,
|
||||
RRDVAR_FLAG_RRDCALC_FAMILY_VAR,
|
||||
&rc->value);
|
||||
|
||||
rc->rrdvar_host_chart_name = rrdvar_add_and_acquire(
|
||||
"host",
|
||||
host->rrdvars,
|
||||
rrdset_name_rrdcalc_name,
|
||||
RRDVAR_TYPE_CALCULATED,
|
||||
RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR,
|
||||
&rc->value);
|
||||
|
||||
rc->rrdvar_host_chart_id = rrdvar_add_and_acquire(
|
||||
"host",
|
||||
host->rrdvars,
|
||||
rrdset_id_rrdcalc_name,
|
||||
RRDVAR_TYPE_CALCULATED,
|
||||
RRDVAR_FLAG_RRDCALC_HOST_CHARTID_VAR | ((rc->rrdvar_host_chart_name) ? 0 : RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR),
|
||||
&rc->value);
|
||||
|
||||
string_freez(rrdset_id_rrdcalc_name);
|
||||
string_freez(rrdset_name_rrdcalc_name);
|
||||
|
||||
if(!rc->units)
|
||||
rc->units = string_dup(st->units);
|
||||
|
||||
rrdvar_store_for_chart(host, st);
|
||||
|
||||
rrdcalc_update_info_using_rrdset_labels(rc);
|
||||
|
||||
if(!rc->summary) {
|
||||
rc->summary = string_dup(rc->name);
|
||||
rc->original_summary = string_dup(rc->name);
|
||||
}
|
||||
|
||||
time_t now = now_realtime_sec();
|
||||
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host,
|
||||
rc->id,
|
||||
rc->next_event_id++,
|
||||
rc->config_hash_id,
|
||||
now,
|
||||
rc->name,
|
||||
rc->rrdset->id,
|
||||
rc->rrdset->context,
|
||||
rc->rrdset->name,
|
||||
rc->classification,
|
||||
rc->component,
|
||||
rc->type,
|
||||
rc->exec,
|
||||
rc->recipient,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
RRDCALC_STATUS_REMOVED,
|
||||
rc->status,
|
||||
rc->source,
|
||||
rc->units,
|
||||
rc->summary,
|
||||
rc->info,
|
||||
0,
|
||||
rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0);
|
||||
|
||||
health_alarm_log_add_entry(host, ae);
|
||||
rrdset_flag_set(st, RRDSET_FLAG_HAS_RRDCALC_LINKED);
|
||||
}
|
||||
|
||||
static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) {
|
||||
RRDSET *st = rc->rrdset;
|
||||
|
||||
if(!st) {
|
||||
netdata_log_debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
netdata_log_error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
return;
|
||||
}
|
||||
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
time_t now = now_realtime_sec();
|
||||
|
||||
if (likely(rc->status != RRDCALC_STATUS_REMOVED)) {
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host,
|
||||
rc->id,
|
||||
rc->next_event_id++,
|
||||
rc->config_hash_id,
|
||||
now,
|
||||
rc->name,
|
||||
rc->rrdset->id,
|
||||
rc->rrdset->context,
|
||||
rc->rrdset->name,
|
||||
rc->classification,
|
||||
rc->component,
|
||||
rc->type,
|
||||
rc->exec,
|
||||
rc->recipient,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
rc->status,
|
||||
RRDCALC_STATUS_REMOVED,
|
||||
rc->source,
|
||||
rc->units,
|
||||
rc->summary,
|
||||
rc->info,
|
||||
0,
|
||||
0);
|
||||
|
||||
health_alarm_log_add_entry(host, ae);
|
||||
}
|
||||
|
||||
netdata_log_debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host));
|
||||
|
||||
// unlink it
|
||||
|
||||
if(!having_ll_wrlock)
|
||||
rw_spinlock_write_lock(&st->alerts.spinlock);
|
||||
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(st->alerts.base, rc, prev, next);
|
||||
|
||||
if(!having_ll_wrlock)
|
||||
rw_spinlock_write_unlock(&st->alerts.spinlock);
|
||||
|
||||
rc->rrdset = NULL;
|
||||
|
||||
rrdvar_release_and_del(st->rrdvars, rc->rrdvar_local);
|
||||
rc->rrdvar_local = NULL;
|
||||
|
||||
rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rc->rrdvar_family);
|
||||
rc->rrdvar_family = NULL;
|
||||
|
||||
rrdvar_release_and_del(host->rrdvars, rc->rrdvar_host_chart_id);
|
||||
rc->rrdvar_host_chart_id = NULL;
|
||||
|
||||
rrdvar_release_and_del(host->rrdvars, rc->rrdvar_host_chart_name);
|
||||
rc->rrdvar_host_chart_name = NULL;
|
||||
|
||||
// RRDCALC will remain in RRDHOST
|
||||
// so that if the matching chart is found in the future
|
||||
// it will be applied automatically
|
||||
}
|
||||
|
||||
static inline bool rrdcalc_check_if_it_matches_rrdset(RRDCALC *rc, RRDSET *st) {
|
||||
if ( (rc->chart != st->id)
|
||||
&& (rc->chart != st->name))
|
||||
return false;
|
||||
|
||||
if (rc->module_pattern && !simple_pattern_matches_string(rc->module_pattern, st->module_name))
|
||||
return false;
|
||||
|
||||
if (rc->plugin_pattern && !simple_pattern_matches_string(rc->plugin_pattern, st->module_name))
|
||||
return false;
|
||||
|
||||
if (st->rrdhost->rrdlabels && rc->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(
|
||||
st->rrdhost->rrdlabels, rc->host_labels_pattern, '=', NULL))
|
||||
return false;
|
||||
|
||||
if (st->rrdlabels && rc->chart_labels_pattern && !rrdlabels_match_simple_pattern_parsed(
|
||||
st->rrdlabels, rc->chart_labels_pattern, '=', NULL))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void rrdcalc_link_matching_alerts_to_rrdset(RRDSET *st) {
|
||||
RRDHOST *host = st->rrdhost;
|
||||
// netdata_log_debug(D_HEALTH, "find matching alarms for chart '%s'", st->id);
|
||||
|
||||
RRDCALC *rc;
|
||||
foreach_rrdcalc_in_rrdhost_read(host, rc) {
|
||||
if(rc->rrdset)
|
||||
continue;
|
||||
|
||||
if(unlikely(rrdcalc_check_if_it_matches_rrdset(rc, st)))
|
||||
rrdcalc_link_to_rrdset(st, rc);
|
||||
}
|
||||
foreach_rrdcalc_in_rrdhost_done(rc);
|
||||
}
|
||||
|
||||
static inline int rrdcalc_check_and_link_rrdset_callback(RRDSET *st, void *rrdcalc) {
|
||||
RRDCALC *rc = rrdcalc;
|
||||
|
||||
if(unlikely(rrdcalc_check_if_it_matches_rrdset(rc, st))) {
|
||||
rrdcalc_link_to_rrdset(st, rc);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC rrdhost index management - constructor
|
||||
|
||||
struct rrdcalc_constructor {
|
||||
RRDHOST *rrdhost; // the host we operate upon
|
||||
RRDCALC *from_config; // points to the original RRDCALC, as loaded from the config
|
||||
RRDCALCTEMPLATE *from_rrdcalctemplate; // the template this alert is generated from
|
||||
RRDSET *rrdset; // when this comes from rrdcalctemplate, we have a matching rrdset
|
||||
const char *overwrite_alert_name; // when we have a dimension foreach, the alert is renamed
|
||||
const char *overwrite_dimensions; // when we have a dimension foreach, the dimensions filter is renamed
|
||||
|
||||
enum {
|
||||
RRDCALC_REACT_NONE,
|
||||
RRDCALC_REACT_NEW,
|
||||
} react_action;
|
||||
|
||||
bool existing_from_template;
|
||||
};
|
||||
|
||||
static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) {
|
||||
RRDCALC *rc = rrdcalc;
|
||||
struct rrdcalc_constructor *ctr = constructor_data;
|
||||
RRDHOST *host = ctr->rrdhost;
|
||||
|
||||
rc->key = string_strdupz(dictionary_acquired_item_name(item));
|
||||
|
||||
if(ctr->from_rrdcalctemplate) {
|
||||
rc->run_flags |= RRDCALC_FLAG_FROM_TEMPLATE;
|
||||
|
||||
RRDCALCTEMPLATE *rt = ctr->from_rrdcalctemplate;
|
||||
RRDSET *st = ctr->rrdset;
|
||||
|
||||
rc->next_event_id = 1;
|
||||
rc->name = (ctr->overwrite_alert_name) ? string_strdupz(ctr->overwrite_alert_name) : string_dup(rt->name);
|
||||
rc->chart = string_dup(st->id);
|
||||
uuid_copy(rc->config_hash_id, rt->config_hash_id);
|
||||
|
||||
rc->dimensions = (ctr->overwrite_dimensions) ? string_strdupz(ctr->overwrite_dimensions) : string_dup(rt->dimensions);
|
||||
rc->foreach_dimension = NULL;
|
||||
rc->foreach_dimension_pattern = NULL;
|
||||
|
||||
rc->green = rt->green;
|
||||
rc->red = rt->red;
|
||||
rc->value = NAN;
|
||||
rc->old_value = NAN;
|
||||
|
||||
rc->delay_up_duration = rt->delay_up_duration;
|
||||
rc->delay_down_duration = rt->delay_down_duration;
|
||||
rc->delay_max_duration = rt->delay_max_duration;
|
||||
rc->delay_multiplier = rt->delay_multiplier;
|
||||
|
||||
rc->last_repeat = 0;
|
||||
rc->times_repeat = 0;
|
||||
rc->warn_repeat_every = rt->warn_repeat_every;
|
||||
rc->crit_repeat_every = rt->crit_repeat_every;
|
||||
|
||||
rc->group = rt->group;
|
||||
rc->after = rt->after;
|
||||
rc->before = rt->before;
|
||||
rc->update_every = rt->update_every;
|
||||
rc->options = rt->options;
|
||||
|
||||
rc->exec = string_dup(rt->exec);
|
||||
rc->recipient = string_dup(rt->recipient);
|
||||
rc->source = string_dup(rt->source);
|
||||
rc->units = string_dup(rt->units);
|
||||
rc->info = string_dup(rt->info);
|
||||
rc->original_info = string_dup(rt->info);
|
||||
|
||||
if (!rt->summary)
|
||||
rt->summary = string_dup(rc->name);
|
||||
rc->summary = string_dup(rt->summary);
|
||||
rc->original_summary = string_dup(rt->summary);
|
||||
|
||||
rc->classification = string_dup(rt->classification);
|
||||
rc->component = string_dup(rt->component);
|
||||
rc->type = string_dup(rt->type);
|
||||
|
||||
if(rt->calculation) {
|
||||
rc->calculation = expression_parse(rt->calculation->source, NULL, NULL);
|
||||
if(!rc->calculation)
|
||||
netdata_log_error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->calculation->source);
|
||||
}
|
||||
if(rt->warning) {
|
||||
rc->warning = expression_parse(rt->warning->source, NULL, NULL);
|
||||
if(!rc->warning)
|
||||
netdata_log_error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->warning->source);
|
||||
}
|
||||
if(rt->critical) {
|
||||
rc->critical = expression_parse(rt->critical->source, NULL, NULL);
|
||||
if(!rc->critical)
|
||||
netdata_log_error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rrdset_id(st), rrdcalctemplate_name(rt), rt->critical->source);
|
||||
}
|
||||
}
|
||||
else if(ctr->from_config) {
|
||||
// dictionary has already copied all the members values and pointers
|
||||
// no need for additional work in this case
|
||||
;
|
||||
}
|
||||
|
||||
rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id, &rc->config_hash_id);
|
||||
|
||||
if(rc->calculation) {
|
||||
rc->calculation->status = &rc->status;
|
||||
rc->calculation->myself = &rc->value;
|
||||
rc->calculation->after = &rc->db_after;
|
||||
rc->calculation->before = &rc->db_before;
|
||||
rc->calculation->rrdcalc = rc;
|
||||
}
|
||||
|
||||
if(rc->warning) {
|
||||
rc->warning->status = &rc->status;
|
||||
rc->warning->myself = &rc->value;
|
||||
rc->warning->after = &rc->db_after;
|
||||
rc->warning->before = &rc->db_before;
|
||||
rc->warning->rrdcalc = rc;
|
||||
}
|
||||
|
||||
if(rc->critical) {
|
||||
rc->critical->status = &rc->status;
|
||||
rc->critical->myself = &rc->value;
|
||||
rc->critical->after = &rc->db_after;
|
||||
rc->critical->before = &rc->db_before;
|
||||
rc->critical->rrdcalc = rc;
|
||||
}
|
||||
|
||||
netdata_log_debug(D_HEALTH, "Health added alarm '%s.%s': exec '%s', recipient '%s', green " NETDATA_DOUBLE_FORMAT_AUTO
|
||||
", red " NETDATA_DOUBLE_FORMAT_AUTO
|
||||
", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
|
||||
rrdcalc_chart_name(rc),
|
||||
rrdcalc_name(rc),
|
||||
(rc->exec)?rrdcalc_exec(rc):"DEFAULT",
|
||||
(rc->recipient)?rrdcalc_recipient(rc):"DEFAULT",
|
||||
rc->green,
|
||||
rc->red,
|
||||
(int)rc->group,
|
||||
rc->after,
|
||||
rc->before,
|
||||
rc->options,
|
||||
(rc->dimensions)?rrdcalc_dimensions(rc):"NONE",
|
||||
(rc->foreach_dimension)?rrdcalc_foreachdim(rc):"NONE",
|
||||
rc->update_every,
|
||||
(rc->calculation)?rc->calculation->parsed_as:"NONE",
|
||||
(rc->warning)?rc->warning->parsed_as:"NONE",
|
||||
(rc->critical)?rc->critical->parsed_as:"NONE",
|
||||
rrdcalc_source(rc),
|
||||
rc->delay_up_duration,
|
||||
rc->delay_down_duration,
|
||||
rc->delay_max_duration,
|
||||
rc->delay_multiplier,
|
||||
rc->warn_repeat_every,
|
||||
rc->crit_repeat_every
|
||||
);
|
||||
|
||||
ctr->react_action = RRDCALC_REACT_NEW;
|
||||
}
|
||||
|
||||
static bool rrdcalc_rrdhost_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdcalc_new __maybe_unused, void *constructor_data ) {
|
||||
RRDCALC *rc = rrdcalc;
|
||||
struct rrdcalc_constructor *ctr = constructor_data;
|
||||
|
||||
if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE)
|
||||
ctr->existing_from_template = true;
|
||||
else
|
||||
ctr->existing_from_template = false;
|
||||
|
||||
ctr->react_action = RRDCALC_REACT_NONE;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void rrdcalc_rrdhost_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) {
|
||||
RRDCALC *rc = rrdcalc;
|
||||
struct rrdcalc_constructor *ctr = constructor_data;
|
||||
RRDHOST *host = ctr->rrdhost;
|
||||
|
||||
if(ctr->react_action == RRDCALC_REACT_NEW) {
|
||||
if(ctr->rrdset)
|
||||
rrdcalc_link_to_rrdset(ctr->rrdset, rc);
|
||||
|
||||
else if (ctr->from_rrdcalctemplate)
|
||||
rrdcontext_foreach_instance_with_rrdset_in_context(host, string2str(ctr->from_rrdcalctemplate->context), rrdcalc_check_and_link_rrdset_callback, rc);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC rrdhost index management - destructor
|
||||
|
||||
static void rrdcalc_free_internals(RRDCALC *rc) {
|
||||
if(unlikely(!rc)) return;
|
||||
|
||||
expression_free(rc->calculation);
|
||||
expression_free(rc->warning);
|
||||
expression_free(rc->critical);
|
||||
|
||||
string_freez(rc->key);
|
||||
string_freez(rc->name);
|
||||
string_freez(rc->chart);
|
||||
string_freez(rc->dimensions);
|
||||
string_freez(rc->foreach_dimension);
|
||||
string_freez(rc->exec);
|
||||
string_freez(rc->recipient);
|
||||
string_freez(rc->source);
|
||||
string_freez(rc->units);
|
||||
string_freez(rc->info);
|
||||
string_freez(rc->original_info);
|
||||
string_freez(rc->classification);
|
||||
string_freez(rc->component);
|
||||
string_freez(rc->type);
|
||||
string_freez(rc->host_labels);
|
||||
string_freez(rc->module_match);
|
||||
string_freez(rc->plugin_match);
|
||||
string_freez(rc->chart_labels);
|
||||
|
||||
simple_pattern_free(rc->foreach_dimension_pattern);
|
||||
simple_pattern_free(rc->host_labels_pattern);
|
||||
simple_pattern_free(rc->module_pattern);
|
||||
simple_pattern_free(rc->plugin_pattern);
|
||||
simple_pattern_free(rc->chart_labels_pattern);
|
||||
}
|
||||
|
||||
static void rrdcalc_rrdhost_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdhost __maybe_unused) {
|
||||
RRDCALC *rc = rrdcalc;
|
||||
//RRDHOST *host = rrdhost;
|
||||
|
||||
if(unlikely(rc->rrdset))
|
||||
rrdcalc_unlink_from_rrdset(rc, false);
|
||||
|
||||
// any destruction actions that require other locks
|
||||
// have to be placed in rrdcalc_del(), because the object is actually locked for deletion
|
||||
|
||||
rrdcalc_free_internals(rc);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC rrdhost index management - index API
|
||||
|
||||
void rrdcalc_rrdhost_index_init(RRDHOST *host) {
|
||||
if(!host->rrdcalc_root_index) {
|
||||
host->rrdcalc_root_index = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
&dictionary_stats_category_rrdhealth, sizeof(RRDCALC));
|
||||
|
||||
dictionary_register_insert_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_conflict_callback, NULL);
|
||||
dictionary_register_react_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_react_callback, NULL);
|
||||
dictionary_register_delete_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_delete_callback, host);
|
||||
}
|
||||
}
|
||||
|
||||
void rrdcalc_rrdhost_index_destroy(RRDHOST *host) {
|
||||
dictionary_destroy(host->rrdcalc_root_index);
|
||||
host->rrdcalc_root_index = NULL;
|
||||
}
|
||||
|
||||
void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions) {
|
||||
char key[RRDCALC_MAX_KEY_SIZE + 1];
|
||||
size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st),
|
||||
overwrite_alert_name?overwrite_alert_name:string2str(rt->name));
|
||||
|
||||
struct rrdcalc_constructor tmp = {
|
||||
.rrdhost = host,
|
||||
.from_config = NULL,
|
||||
.from_rrdcalctemplate = rt,
|
||||
.rrdset = st,
|
||||
.overwrite_alert_name = overwrite_alert_name,
|
||||
.overwrite_dimensions = overwrite_dimensions,
|
||||
.react_action = RRDCALC_REACT_NONE,
|
||||
.existing_from_template = false,
|
||||
};
|
||||
|
||||
dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)key_len, NULL, sizeof(RRDCALC), &tmp);
|
||||
if(tmp.react_action != RRDCALC_REACT_NEW && tmp.existing_from_template == false)
|
||||
netdata_log_error("RRDCALC: from template '%s' on chart '%s' with key '%s', failed to be added to host '%s'. It is manually configured.",
|
||||
string2str(rt->name), rrdset_id(st), key, rrdhost_hostname(host));
|
||||
}
|
||||
|
||||
int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc) {
|
||||
if(!rc->chart) {
|
||||
netdata_log_error("Health configuration for alarm '%s' does not have a chart", rrdcalc_name(rc));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(!rc->update_every) {
|
||||
netdata_log_error("Health configuration for alarm '%s.%s' has no frequency (parameter 'every'). Ignoring it.", rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(!RRDCALC_HAS_DB_LOOKUP(rc) && !rc->calculation && !rc->warning && !rc->critical) {
|
||||
netdata_log_error("Health configuration for alarm '%s.%s' is useless (no db lookup, no calculation, no warning and no critical expressions)", rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
return 0;
|
||||
}
|
||||
|
||||
char key[RRDCALC_MAX_KEY_SIZE + 1];
|
||||
size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, string2str(rc->chart), string2str(rc->name));
|
||||
|
||||
struct rrdcalc_constructor tmp = {
|
||||
.rrdhost = host,
|
||||
.from_config = rc,
|
||||
.from_rrdcalctemplate = NULL,
|
||||
.rrdset = NULL,
|
||||
.react_action = RRDCALC_REACT_NONE,
|
||||
};
|
||||
|
||||
int ret = 1;
|
||||
RRDCALC *t = dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)key_len, rc, sizeof(RRDCALC), &tmp);
|
||||
if(tmp.react_action == RRDCALC_REACT_NEW) {
|
||||
// we copied rc into the dictionary, so we have to free the container here
|
||||
freez(rc);
|
||||
rc = t;
|
||||
|
||||
// since we loaded this config from configuration, we need to check if we can link it to alarms
|
||||
RRDSET *st;
|
||||
rrdset_foreach_read(st, host) {
|
||||
if (unlikely(rrdcalc_check_and_link_rrdset_callback(st, rc) == -1))
|
||||
break;
|
||||
}
|
||||
rrdset_foreach_done(st);
|
||||
}
|
||||
else {
|
||||
netdata_log_error(
|
||||
"RRDCALC: from config '%s' on chart '%s' failed to be added to host '%s'. It already exists.",
|
||||
string2str(rc->name),
|
||||
string2str(rc->chart),
|
||||
rrdhost_hostname(host));
|
||||
|
||||
ret = 0;
|
||||
|
||||
// free all of it, internals and the container
|
||||
rrdcalc_free_unused_rrdcalc_loaded_from_config(rc);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rrdcalc_unlink_and_delete(RRDHOST *host, RRDCALC *rc, bool having_ll_wrlock) {
|
||||
if(rc->rrdset)
|
||||
rrdcalc_unlink_from_rrdset(rc, having_ll_wrlock);
|
||||
|
||||
dictionary_del_advanced(host->rrdcalc_root_index, string2str(rc->key), (ssize_t)string_strlen(rc->key));
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC cleanup API functions
|
||||
|
||||
void rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(RRDHOST *host) {
|
||||
RRDCALC *rc;
|
||||
foreach_rrdcalc_in_rrdhost_reentrant(host, rc) {
|
||||
if (!rc->host_labels)
|
||||
continue;
|
||||
|
||||
if(!rrdlabels_match_simple_pattern_parsed(host->rrdlabels, rc->host_labels_pattern, '=', NULL)) {
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"Health configuration for alarm '%s' cannot be applied, "
|
||||
"because the host %s does not have the label(s) '%s'",
|
||||
rrdcalc_name(rc), rrdhost_hostname(host), rrdcalc_host_labels(rc));
|
||||
|
||||
rrdcalc_unlink_and_delete(host, rc, false);
|
||||
}
|
||||
}
|
||||
foreach_rrdcalc_in_rrdhost_done(rc);
|
||||
}
|
||||
|
||||
void rrdcalc_delete_alerts_not_matching_host_labels_from_all_hosts() {
|
||||
RRDHOST *host;
|
||||
dfe_start_reentrant(rrdhost_root_index, host) {
|
||||
if (unlikely(!host->health.health_enabled))
|
||||
continue;
|
||||
|
||||
if (host->rrdlabels)
|
||||
rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(host);
|
||||
}
|
||||
dfe_done(host);
|
||||
}
|
||||
|
||||
void rrdcalc_unlink_all_rrdset_alerts(RRDSET *st) {
|
||||
RRDCALC *rc, *last = NULL;
|
||||
rw_spinlock_write_lock(&st->alerts.spinlock);
|
||||
while((rc = st->alerts.base)) {
|
||||
if(last == rc) {
|
||||
netdata_log_error("RRDCALC: malformed list of alerts linked to chart - cannot cleanup - giving up.");
|
||||
break;
|
||||
}
|
||||
last = rc;
|
||||
|
||||
if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE) {
|
||||
// if the alert comes from a template we can just delete it
|
||||
rrdcalc_unlink_and_delete(st->rrdhost, rc, true);
|
||||
}
|
||||
else {
|
||||
// this is a configuration for a specific chart
|
||||
// it should stay in the list
|
||||
rrdcalc_unlink_from_rrdset(rc, true);
|
||||
}
|
||||
|
||||
}
|
||||
rw_spinlock_write_unlock(&st->alerts.spinlock);
|
||||
}
|
||||
|
||||
void rrdcalc_delete_all(RRDHOST *host) {
|
||||
dictionary_flush(host->rrdcalc_root_index);
|
||||
}
|
||||
|
||||
void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc) {
|
||||
if(rc->rrdset)
|
||||
rrdcalc_unlink_from_rrdset(rc, false);
|
||||
|
||||
rrdcalc_free_internals(rc);
|
||||
freez(rc);
|
||||
}
|
|
@ -1,271 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
#ifndef NETDATA_RRDCALC_H
|
||||
#define NETDATA_RRDCALC_H 1
|
||||
|
||||
// calculated variables (defined in health configuration)
|
||||
// These aggregate time-series data at fixed intervals
|
||||
// (defined in their update_every member below)
|
||||
// They increase the overhead of netdata.
|
||||
//
|
||||
// These calculations are stored under RRDHOST.
|
||||
// Then are also linked to RRDSET (of course only when a
|
||||
// matching chart is found).
|
||||
|
||||
typedef enum {
|
||||
RRDCALC_FLAG_DB_ERROR = (1 << 0),
|
||||
RRDCALC_FLAG_DB_NAN = (1 << 1),
|
||||
// RRDCALC_FLAG_DB_STALE = (1 << 2),
|
||||
RRDCALC_FLAG_CALC_ERROR = (1 << 3),
|
||||
RRDCALC_FLAG_WARN_ERROR = (1 << 4),
|
||||
RRDCALC_FLAG_CRIT_ERROR = (1 << 5),
|
||||
RRDCALC_FLAG_RUNNABLE = (1 << 6),
|
||||
RRDCALC_FLAG_DISABLED = (1 << 7),
|
||||
RRDCALC_FLAG_SILENCED = (1 << 8),
|
||||
RRDCALC_FLAG_RUN_ONCE = (1 << 9),
|
||||
RRDCALC_FLAG_FROM_TEMPLATE = (1 << 10), // the rrdcalc has been created from a template
|
||||
} RRDCALC_FLAGS;
|
||||
|
||||
void rrdcalc_flags_to_json_array(BUFFER *wb, const char *key, RRDCALC_FLAGS flags);
|
||||
|
||||
typedef enum {
|
||||
// This list uses several other options from RRDR_OPTIONS for db lookups.
|
||||
// To add an item here, you need to reserve a bit in RRDR_OPTIONS.
|
||||
RRDCALC_OPTION_NO_CLEAR_NOTIFICATION = RRDR_OPTION_HEALTH_RSRVD1,
|
||||
} RRDCALC_OPTIONS;
|
||||
|
||||
#define RRDCALC_ALL_OPTIONS_EXCLUDING_THE_RRDR_ONES (RRDCALC_OPTION_NO_CLEAR_NOTIFICATION)
|
||||
|
||||
struct rrdcalc {
|
||||
STRING *key; // the unique key in the host's rrdcalc_root_index
|
||||
|
||||
uint32_t id; // the unique id of this alarm
|
||||
uint32_t next_event_id; // the next event id that will be used for this alarm
|
||||
|
||||
uuid_t config_hash_id; // a predictable hash_id based on specific alert configuration
|
||||
|
||||
STRING *name; // the name of this alarm
|
||||
STRING *chart; // the chart id this should be linked to
|
||||
|
||||
STRING *exec; // the command to execute when this alarm switches state
|
||||
STRING *recipient; // the recipient of the alarm (the first parameter to exec)
|
||||
|
||||
STRING *classification; // the class that this alarm belongs
|
||||
STRING *component; // the component that this alarm refers to
|
||||
STRING *type; // type of the alarm
|
||||
|
||||
STRING *plugin_match; // the plugin name that should be linked to
|
||||
SIMPLE_PATTERN *plugin_pattern;
|
||||
|
||||
STRING *module_match; // the module name that should be linked to
|
||||
SIMPLE_PATTERN *module_pattern;
|
||||
|
||||
STRING *source; // the source of this alarm
|
||||
STRING *units; // the units of the alarm
|
||||
STRING *summary; // a short alert summary
|
||||
STRING *original_summary; // the original summary field before any variable replacement
|
||||
STRING *original_info; // the original info field before any variable replacement
|
||||
STRING *info; // a description of the alarm
|
||||
|
||||
int update_every; // update frequency for the alarm
|
||||
|
||||
// the red and green threshold of this alarm (to be set to the chart)
|
||||
NETDATA_DOUBLE green;
|
||||
NETDATA_DOUBLE red;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// database lookup settings
|
||||
|
||||
STRING *dimensions; // the chart dimensions
|
||||
STRING *foreach_dimension; // the group of dimensions that the `foreach` will be applied.
|
||||
SIMPLE_PATTERN *foreach_dimension_pattern; // used if and only if there is a simple pattern for the chart.
|
||||
RRDR_TIME_GROUPING group; // grouping method: average, max, etc.
|
||||
int before; // ending point in time-series
|
||||
int after; // starting point in time-series
|
||||
RRDCALC_OPTIONS options; // configuration options
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// expressions related to the alarm
|
||||
|
||||
EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm
|
||||
EVAL_EXPRESSION *warning; // expression to check the warning condition
|
||||
EVAL_EXPRESSION *critical; // expression to check the critical condition
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// notification delay settings
|
||||
|
||||
int delay_up_duration; // duration to delay notifications when alarm raises
|
||||
int delay_down_duration; // duration to delay notifications when alarm lowers
|
||||
int delay_max_duration; // the absolute max delay to apply to this alarm
|
||||
float delay_multiplier; // multiplier for all delays when alarms switch status
|
||||
// while now < delay_up_to
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// notification repeat settings
|
||||
|
||||
uint32_t warn_repeat_every; // interval between repeating warning notifications
|
||||
uint32_t crit_repeat_every; // interval between repeating critical notifications
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Labels settings
|
||||
STRING *host_labels; // the label read from an alarm file
|
||||
SIMPLE_PATTERN *host_labels_pattern; // the simple pattern of labels
|
||||
|
||||
STRING *chart_labels; // the chart label read from an alarm file
|
||||
SIMPLE_PATTERN *chart_labels_pattern; // the simple pattern of chart labels
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// runtime information
|
||||
|
||||
RRDCALC_STATUS old_status; // the old status of the alarm
|
||||
RRDCALC_STATUS status; // the current status of the alarm
|
||||
|
||||
NETDATA_DOUBLE value; // the current value of the alarm
|
||||
NETDATA_DOUBLE old_value; // the previous value of the alarm
|
||||
NETDATA_DOUBLE last_status_change_value; // the value at the last status change
|
||||
|
||||
RRDCALC_FLAGS run_flags; // check RRDCALC_FLAG_*
|
||||
|
||||
time_t last_updated; // the last update timestamp of the alarm
|
||||
time_t next_update; // the next update timestamp of the alarm
|
||||
time_t last_status_change; // the timestamp of the last time this alarm changed status
|
||||
time_t last_repeat; // the last time the alarm got repeated
|
||||
uint32_t times_repeat; // number of times the alarm got repeated
|
||||
|
||||
time_t db_after; // the first timestamp evaluated by the db lookup
|
||||
time_t db_before; // the last timestamp evaluated by the db lookup
|
||||
|
||||
time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications
|
||||
int delay_up_current; // the current up notification delay duration
|
||||
int delay_down_current; // the current down notification delay duration
|
||||
int delay_last; // the last delay we used
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// variables this alarm exposes to the rest of the alarms
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_local;
|
||||
const RRDVAR_ACQUIRED *rrdvar_family;
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_chart_id;
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_chart_name;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// the chart this alarm it is linked to
|
||||
|
||||
size_t labels_version;
|
||||
struct rrdset *rrdset;
|
||||
|
||||
struct rrdcalc *next;
|
||||
struct rrdcalc *prev;
|
||||
};
|
||||
|
||||
#define rrdcalc_name(rc) string2str((rc)->name)
|
||||
#define rrdcalc_chart_name(rc) string2str((rc)->chart)
|
||||
#define rrdcalc_exec(rc) string2str((rc)->exec)
|
||||
#define rrdcalc_recipient(rc) string2str((rc)->recipient)
|
||||
#define rrdcalc_classification(rc) string2str((rc)->classification)
|
||||
#define rrdcalc_component(rc) string2str((rc)->component)
|
||||
#define rrdcalc_type(rc) string2str((rc)->type)
|
||||
#define rrdcalc_plugin_match(rc) string2str((rc)->plugin_match)
|
||||
#define rrdcalc_module_match(rc) string2str((rc)->module_match)
|
||||
#define rrdcalc_source(rc) string2str((rc)->source)
|
||||
#define rrdcalc_units(rc) string2str((rc)->units)
|
||||
#define rrdcalc_original_summary(rc) string2str((rc)->original_summary)
|
||||
#define rrdcalc_summary(rc) string2str((rc)->summary)
|
||||
#define rrdcalc_original_info(rc) string2str((rc)->original_info)
|
||||
#define rrdcalc_info(rc) string2str((rc)->info)
|
||||
#define rrdcalc_dimensions(rc) string2str((rc)->dimensions)
|
||||
#define rrdcalc_foreachdim(rc) string2str((rc)->foreach_dimension)
|
||||
#define rrdcalc_host_labels(rc) string2str((rc)->host_labels)
|
||||
#define rrdcalc_chart_labels(rc) string2str((rc)->chart_labels)
|
||||
|
||||
#define foreach_rrdcalc_in_rrdhost_read(host, rc) \
|
||||
dfe_start_read((host)->rrdcalc_root_index, rc) \
|
||||
|
||||
#define foreach_rrdcalc_in_rrdhost_reentrant(host, rc) \
|
||||
dfe_start_reentrant((host)->rrdcalc_root_index, rc)
|
||||
|
||||
#define foreach_rrdcalc_in_rrdhost_done(rc) \
|
||||
dfe_done(rc)
|
||||
|
||||
struct alert_config {
|
||||
STRING *alarm;
|
||||
STRING *template_key;
|
||||
STRING *os;
|
||||
STRING *host;
|
||||
STRING *on;
|
||||
STRING *plugin;
|
||||
STRING *module;
|
||||
STRING *charts;
|
||||
STRING *lookup;
|
||||
STRING *calc;
|
||||
STRING *warn;
|
||||
STRING *crit;
|
||||
STRING *every;
|
||||
STRING *green;
|
||||
STRING *red;
|
||||
STRING *exec;
|
||||
STRING *to;
|
||||
STRING *units;
|
||||
STRING *summary;
|
||||
STRING *info;
|
||||
STRING *classification;
|
||||
STRING *component;
|
||||
STRING *type;
|
||||
STRING *delay;
|
||||
STRING *options;
|
||||
STRING *repeat;
|
||||
STRING *host_labels;
|
||||
STRING *chart_labels;
|
||||
STRING *source;
|
||||
|
||||
STRING *p_db_lookup_dimensions;
|
||||
STRING *p_db_lookup_method;
|
||||
|
||||
uint32_t p_db_lookup_options;
|
||||
int32_t p_db_lookup_after;
|
||||
int32_t p_db_lookup_before;
|
||||
int32_t p_update_every;
|
||||
};
|
||||
|
||||
#define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after)
|
||||
|
||||
void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc);
|
||||
|
||||
void rrdcalc_link_matching_alerts_to_rrdset(RRDSET *st);
|
||||
|
||||
const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name);
|
||||
void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca);
|
||||
RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca);
|
||||
|
||||
const char *rrdcalc_status2string(RRDCALC_STATUS status);
|
||||
|
||||
void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc);
|
||||
|
||||
uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id);
|
||||
void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions);
|
||||
int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc);
|
||||
|
||||
void rrdcalc_delete_alerts_not_matching_host_labels_from_all_hosts();
|
||||
void rrdcalc_delete_alerts_not_matching_host_labels_from_this_host(RRDHOST *host);
|
||||
|
||||
static inline int rrdcalc_isrepeating(RRDCALC *rc) {
|
||||
if (unlikely(rc->warn_repeat_every > 0 || rc->crit_repeat_every > 0)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void rrdcalc_unlink_all_rrdset_alerts(RRDSET *st);
|
||||
void rrdcalc_delete_all(RRDHOST *host);
|
||||
|
||||
void rrdcalc_rrdhost_index_init(RRDHOST *host);
|
||||
void rrdcalc_rrdhost_index_destroy(RRDHOST *host);
|
||||
|
||||
#define RRDCALC_VAR_MAX 100
|
||||
#define RRDCALC_VAR_FAMILY "${family}"
|
||||
#define RRDCALC_VAR_LABEL "${label:"
|
||||
#define RRDCALC_VAR_LABEL_LEN (sizeof(RRDCALC_VAR_LABEL)-1)
|
||||
|
||||
#endif //NETDATA_RRDCALC_H
|
|
@ -1,242 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALCTEMPLATE management
|
||||
/**
|
||||
* RRDCALC TEMPLATE LINK MATCHING
|
||||
*
|
||||
* @param rt is the template used to create the chart.
|
||||
* @param st is the chart where the alarm will be attached.
|
||||
*/
|
||||
|
||||
static char *rrdcalc_alert_name_with_dimension(const char *name, size_t namelen, const char *dim, size_t dimlen) {
|
||||
char *newname,*move;
|
||||
|
||||
newname = mallocz(namelen + dimlen + 2);
|
||||
move = newname;
|
||||
memcpy(move, name, namelen);
|
||||
move += namelen;
|
||||
|
||||
*move++ = '_';
|
||||
memcpy(move, dim, dimlen);
|
||||
move += dimlen;
|
||||
*move = '\0';
|
||||
|
||||
return newname;
|
||||
}
|
||||
|
||||
bool rrdcalctemplate_check_rrdset_conditions(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host) {
|
||||
if(rt->context != st->context)
|
||||
return false;
|
||||
|
||||
if(rt->foreach_dimension_pattern && !rrdset_number_of_dimensions(st))
|
||||
return false;
|
||||
|
||||
if (rt->charts_pattern && !simple_pattern_matches_string(rt->charts_pattern, st->name) && !simple_pattern_matches_string(rt->charts_pattern, st->id))
|
||||
return false;
|
||||
|
||||
if (rt->module_pattern && !simple_pattern_matches_string(rt->module_pattern, st->module_name))
|
||||
return false;
|
||||
|
||||
if (rt->plugin_pattern && !simple_pattern_matches_string(rt->plugin_pattern, st->plugin_name))
|
||||
return false;
|
||||
|
||||
if(host->rrdlabels && rt->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(host->rrdlabels,
|
||||
rt->host_labels_pattern,
|
||||
'=', NULL))
|
||||
return false;
|
||||
|
||||
if(st->rrdlabels && rt->chart_labels_pattern && !rrdlabels_match_simple_pattern_parsed(st->rrdlabels,
|
||||
rt->chart_labels_pattern,
|
||||
'=', NULL))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void rrdcalctemplate_check_rrddim_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDDIM *rd, RRDHOST *host) {
|
||||
if (simple_pattern_matches_string(rt->foreach_dimension_pattern, rd->id) ||
|
||||
simple_pattern_matches_string(rt->foreach_dimension_pattern, rd->name)) {
|
||||
char *overwrite_alert_name = rrdcalc_alert_name_with_dimension(
|
||||
rrdcalctemplate_name(rt), string_strlen(rt->name), rrddim_name(rd), string_strlen(rd->name));
|
||||
rrdcalc_add_from_rrdcalctemplate(host, rt, st, overwrite_alert_name, rrddim_name(rd));
|
||||
freez(overwrite_alert_name);
|
||||
}
|
||||
}
|
||||
|
||||
void rrdcalctemplate_check_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host) {
|
||||
if(!rrdcalctemplate_check_rrdset_conditions(rt, st, host))
|
||||
return;
|
||||
|
||||
if(!rt->foreach_dimension_pattern) {
|
||||
rrdcalc_add_from_rrdcalctemplate(host, rt, st, NULL, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
RRDDIM *rd;
|
||||
rrddim_foreach_read(rd, st) {
|
||||
rrdcalctemplate_check_rrddim_conditions_and_link(rt, st, rd, host);
|
||||
}
|
||||
rrddim_foreach_done(rd);
|
||||
}
|
||||
|
||||
void rrdcalctemplate_link_matching_templates_to_rrdset(RRDSET *st) {
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
RRDCALCTEMPLATE *rt;
|
||||
foreach_rrdcalctemplate_read(host, rt) {
|
||||
rrdcalctemplate_check_conditions_and_link(rt, st, host);
|
||||
}
|
||||
foreach_rrdcalctemplate_done(rt);
|
||||
}
|
||||
|
||||
static void rrdcalctemplate_free_internals(RRDCALCTEMPLATE *rt) {
|
||||
expression_free(rt->calculation);
|
||||
expression_free(rt->warning);
|
||||
expression_free(rt->critical);
|
||||
|
||||
string_freez(rt->plugin_match);
|
||||
simple_pattern_free(rt->plugin_pattern);
|
||||
|
||||
string_freez(rt->module_match);
|
||||
simple_pattern_free(rt->module_pattern);
|
||||
|
||||
string_freez(rt->charts_match);
|
||||
simple_pattern_free(rt->charts_pattern);
|
||||
|
||||
string_freez(rt->name);
|
||||
string_freez(rt->exec);
|
||||
string_freez(rt->recipient);
|
||||
string_freez(rt->classification);
|
||||
string_freez(rt->component);
|
||||
string_freez(rt->type);
|
||||
string_freez(rt->context);
|
||||
string_freez(rt->source);
|
||||
string_freez(rt->units);
|
||||
string_freez(rt->info);
|
||||
string_freez(rt->dimensions);
|
||||
string_freez(rt->foreach_dimension);
|
||||
string_freez(rt->host_labels);
|
||||
string_freez(rt->chart_labels);
|
||||
simple_pattern_free(rt->foreach_dimension_pattern);
|
||||
simple_pattern_free(rt->host_labels_pattern);
|
||||
simple_pattern_free(rt->chart_labels_pattern);
|
||||
}
|
||||
|
||||
void rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(RRDCALCTEMPLATE *rt) {
|
||||
if(unlikely(!rt)) return;
|
||||
|
||||
rrdcalctemplate_free_internals(rt);
|
||||
freez(rt);
|
||||
}
|
||||
static void rrdcalctemplate_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalctemplate, void *added_bool) {
|
||||
RRDCALCTEMPLATE *rt = rrdcalctemplate; (void)rt;
|
||||
|
||||
bool *added = added_bool;
|
||||
*added = true;
|
||||
|
||||
netdata_log_debug(D_HEALTH, "Health configuration adding template '%s'"
|
||||
": context '%s'"
|
||||
", exec '%s'"
|
||||
", recipient '%s'"
|
||||
", green " NETDATA_DOUBLE_FORMAT_AUTO
|
||||
", red " NETDATA_DOUBLE_FORMAT_AUTO
|
||||
", lookup: group %d"
|
||||
", after %d"
|
||||
", before %d"
|
||||
", options %u"
|
||||
", dimensions '%s'"
|
||||
", for each dimension '%s'"
|
||||
", update every %d"
|
||||
", calculation '%s'"
|
||||
", warning '%s'"
|
||||
", critical '%s'"
|
||||
", source '%s'"
|
||||
", delay up %d"
|
||||
", delay down %d"
|
||||
", delay max %d"
|
||||
", delay_multiplier %f"
|
||||
", warn_repeat_every %u"
|
||||
", crit_repeat_every %u",
|
||||
rrdcalctemplate_name(rt),
|
||||
(rt->context)?string2str(rt->context):"NONE",
|
||||
(rt->exec)?rrdcalctemplate_exec(rt):"DEFAULT",
|
||||
(rt->recipient)?rrdcalctemplate_recipient(rt):"DEFAULT",
|
||||
rt->green,
|
||||
rt->red,
|
||||
(int)rt->group,
|
||||
rt->after,
|
||||
rt->before,
|
||||
rt->options,
|
||||
(rt->dimensions)?rrdcalctemplate_dimensions(rt):"NONE",
|
||||
(rt->foreach_dimension)?rrdcalctemplate_foreachdim(rt):"NONE",
|
||||
rt->update_every,
|
||||
(rt->calculation)?rt->calculation->parsed_as:"NONE",
|
||||
(rt->warning)?rt->warning->parsed_as:"NONE",
|
||||
(rt->critical)?rt->critical->parsed_as:"NONE",
|
||||
rrdcalctemplate_source(rt),
|
||||
rt->delay_up_duration,
|
||||
rt->delay_down_duration,
|
||||
rt->delay_max_duration,
|
||||
rt->delay_multiplier,
|
||||
rt->warn_repeat_every,
|
||||
rt->crit_repeat_every
|
||||
);
|
||||
}
|
||||
|
||||
static void rrdcalctemplate_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalctemplate, void *rrdhost __maybe_unused) {
|
||||
RRDCALCTEMPLATE *rt = rrdcalctemplate;
|
||||
rrdcalctemplate_free_internals(rt);
|
||||
}
|
||||
|
||||
void rrdcalctemplate_index_init(RRDHOST *host) {
|
||||
if(!host->rrdcalctemplate_root_index) {
|
||||
host->rrdcalctemplate_root_index = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
&dictionary_stats_category_rrdhealth, sizeof(RRDCALCTEMPLATE));
|
||||
|
||||
dictionary_register_insert_callback(host->rrdcalctemplate_root_index, rrdcalctemplate_insert_callback, NULL);
|
||||
dictionary_register_delete_callback(host->rrdcalctemplate_root_index, rrdcalctemplate_delete_callback, host);
|
||||
}
|
||||
}
|
||||
|
||||
void rrdcalctemplate_index_destroy(RRDHOST *host) {
|
||||
dictionary_destroy(host->rrdcalctemplate_root_index);
|
||||
host->rrdcalctemplate_root_index = NULL;
|
||||
}
|
||||
|
||||
inline void rrdcalctemplate_delete_all(RRDHOST *host) {
|
||||
dictionary_flush(host->rrdcalctemplate_root_index);
|
||||
}
|
||||
|
||||
#define RRDCALCTEMPLATE_MAX_KEY_SIZE 1024
|
||||
void rrdcalctemplate_add_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt) {
|
||||
if(unlikely(!rt->context)) {
|
||||
netdata_log_error("Health configuration for template '%s' does not have a context", rrdcalctemplate_name(rt));
|
||||
return;
|
||||
}
|
||||
|
||||
if(unlikely(!rt->update_every)) {
|
||||
netdata_log_error("Health configuration for template '%s' has no frequency (parameter 'every'). Ignoring it.", rrdcalctemplate_name(rt));
|
||||
return;
|
||||
}
|
||||
|
||||
if(unlikely(!RRDCALCTEMPLATE_HAS_DB_LOOKUP(rt) && !rt->calculation && !rt->warning && !rt->critical)) {
|
||||
netdata_log_error("Health configuration for template '%s' is useless (no calculation, no warning and no critical evaluation)", rrdcalctemplate_name(rt));
|
||||
return;
|
||||
}
|
||||
|
||||
char key[RRDCALCTEMPLATE_MAX_KEY_SIZE + 1];
|
||||
size_t key_len = snprintfz(key, RRDCALCTEMPLATE_MAX_KEY_SIZE, "%s", rrdcalctemplate_name(rt));
|
||||
|
||||
bool added = false;
|
||||
dictionary_set_advanced(host->rrdcalctemplate_root_index, key, (ssize_t)key_len, rt, sizeof(*rt), &added);
|
||||
|
||||
if(added)
|
||||
freez(rt);
|
||||
else {
|
||||
netdata_log_info("Health configuration template '%s' already exists for host '%s'.", rrdcalctemplate_name(rt), rrdhost_hostname(host));
|
||||
rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(rt);
|
||||
}
|
||||
}
|
|
@ -1,130 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDCALCTEMPLATE_H
|
||||
#define NETDATA_RRDCALCTEMPLATE_H 1
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
// RRDCALCTEMPLATE
|
||||
// these are to be applied to charts found dynamically
|
||||
// based on their context.
|
||||
struct rrdcalctemplate {
|
||||
uuid_t config_hash_id;
|
||||
|
||||
STRING *name;
|
||||
|
||||
STRING *exec;
|
||||
STRING *recipient;
|
||||
|
||||
STRING *classification;
|
||||
STRING *component;
|
||||
STRING *type;
|
||||
|
||||
STRING *context;
|
||||
|
||||
STRING *plugin_match;
|
||||
SIMPLE_PATTERN *plugin_pattern;
|
||||
|
||||
STRING *module_match;
|
||||
SIMPLE_PATTERN *module_pattern;
|
||||
|
||||
STRING *charts_match;
|
||||
SIMPLE_PATTERN *charts_pattern;
|
||||
|
||||
STRING *source; // the source of this alarm
|
||||
STRING *units; // the units of the alarm
|
||||
STRING *summary; // a short summary of the alarm
|
||||
STRING *info; // a description of the alarm
|
||||
|
||||
int update_every; // update frequency for the alarm
|
||||
|
||||
// the red and green threshold of this alarm (to be set to the chart)
|
||||
NETDATA_DOUBLE green;
|
||||
NETDATA_DOUBLE red;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// database lookup settings
|
||||
|
||||
STRING *dimensions; // the chart dimensions
|
||||
STRING *foreach_dimension; // the group of dimensions that the lookup will be applied.
|
||||
SIMPLE_PATTERN *foreach_dimension_pattern; // used if and only if there is a simple pattern for the chart.
|
||||
RRDR_TIME_GROUPING group; // grouping method: average, max, etc.
|
||||
int before; // ending point in time-series
|
||||
int after; // starting point in time-series
|
||||
RRDCALC_OPTIONS options; // configuration options
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// notification delay settings
|
||||
|
||||
int delay_up_duration; // duration to delay notifications when alarm raises
|
||||
int delay_down_duration; // duration to delay notifications when alarm lowers
|
||||
int delay_max_duration; // the absolute max delay to apply to this alarm
|
||||
float delay_multiplier; // multiplier for all delays when alarms switch status
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// notification repeat settings
|
||||
|
||||
uint32_t warn_repeat_every; // interval between repeating warning notifications
|
||||
uint32_t crit_repeat_every; // interval between repeating critical notifications
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// Labels settings
|
||||
STRING *host_labels; // the label read from an alarm file
|
||||
SIMPLE_PATTERN *host_labels_pattern; // the simple pattern of labels
|
||||
|
||||
STRING *chart_labels; // the chart label read from an alarm file
|
||||
SIMPLE_PATTERN *chart_labels_pattern; // the simple pattern of chart labels
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// expressions related to the alarm
|
||||
|
||||
EVAL_EXPRESSION *calculation;
|
||||
EVAL_EXPRESSION *warning;
|
||||
EVAL_EXPRESSION *critical;
|
||||
|
||||
struct rrdcalctemplate *next;
|
||||
struct rrdcalctemplate *prev;
|
||||
};
|
||||
|
||||
#define foreach_rrdcalctemplate_read(host, rt) \
|
||||
dfe_start_read((host)->rrdcalctemplate_root_index, rt)
|
||||
|
||||
#define foreach_rrdcalctemplate_done(rt) \
|
||||
dfe_done(rt)
|
||||
|
||||
#define rrdcalctemplate_name(rt) string2str((rt)->name)
|
||||
#define rrdcalctemplate_exec(rt) string2str((rt)->exec)
|
||||
#define rrdcalctemplate_recipient(rt) string2str((rt)->recipient)
|
||||
#define rrdcalctemplate_classification(rt) string2str((rt)->classification)
|
||||
#define rrdcalctemplate_component(rt) string2str((rt)->component)
|
||||
#define rrdcalctemplate_type(rt) string2str((rt)->type)
|
||||
#define rrdcalctemplate_plugin_match(rt) string2str((rt)->plugin_match)
|
||||
#define rrdcalctemplate_module_match(rt) string2str((rt)->module_match)
|
||||
#define rrdcalctemplate_charts_match(rt) string2str((rt)->charts_match)
|
||||
#define rrdcalctemplate_units(rt) string2str((rt)->units)
|
||||
#define rrdcalctemplate_summary(rt) string2str((rt)->summary)
|
||||
#define rrdcalctemplate_info(rt) string2str((rt)->info)
|
||||
#define rrdcalctemplate_source(rt) string2str((rt)->source)
|
||||
#define rrdcalctemplate_dimensions(rt) string2str((rt)->dimensions)
|
||||
#define rrdcalctemplate_foreachdim(rt) string2str((rt)->foreach_dimension)
|
||||
#define rrdcalctemplate_host_labels(rt) string2str((rt)->host_labels)
|
||||
#define rrdcalctemplate_chart_labels(rt) string2str((rt)->chart_labels)
|
||||
|
||||
#define RRDCALCTEMPLATE_HAS_DB_LOOKUP(rt) ((rt)->after)
|
||||
|
||||
void rrdcalctemplate_link_matching_templates_to_rrdset(RRDSET *st);
|
||||
|
||||
void rrdcalctemplate_free_unused_rrdcalctemplate_loaded_from_config(RRDCALCTEMPLATE *rt);
|
||||
void rrdcalctemplate_delete_all(RRDHOST *host);
|
||||
void rrdcalctemplate_add_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt);
|
||||
|
||||
void rrdcalctemplate_check_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host);
|
||||
|
||||
bool rrdcalctemplate_check_rrdset_conditions(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host);
|
||||
void rrdcalctemplate_check_rrddim_conditions_and_link(RRDCALCTEMPLATE *rt, RRDSET *st, RRDDIM *rd, RRDHOST *host);
|
||||
|
||||
|
||||
void rrdcalctemplate_index_init(RRDHOST *host);
|
||||
void rrdcalctemplate_index_destroy(RRDHOST *host);
|
||||
|
||||
#endif //NETDATA_RRDCALCTEMPLATE_H
|
|
@ -203,8 +203,6 @@ static void rrddim_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v
|
|||
metaqueue_delete_dimension_uuid(&rd->metric_uuid);
|
||||
}
|
||||
|
||||
rrddimvar_delete_all(rd);
|
||||
|
||||
for(size_t tier = 0; tier < storage_tiers ;tier++) {
|
||||
if(!rd->tiers[tier].smh) continue;
|
||||
|
||||
|
@ -356,8 +354,6 @@ inline int rrddim_reset_name(RRDSET *st __maybe_unused, RRDDIM *rd, const char *
|
|||
rd->name = rrd_string_strdupz(name);
|
||||
string_freez(old);
|
||||
|
||||
rrddimvar_rename_all(rd);
|
||||
|
||||
rrddim_metadata_updated(rd);
|
||||
|
||||
return 1;
|
||||
|
|
|
@ -1,273 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
typedef struct rrddimvar {
|
||||
struct rrddim *rrddim;
|
||||
|
||||
STRING *prefix;
|
||||
STRING *suffix;
|
||||
void *value;
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_local_dim_id;
|
||||
const RRDVAR_ACQUIRED *rrdvar_local_dim_name;
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_family_id;
|
||||
const RRDVAR_ACQUIRED *rrdvar_family_name;
|
||||
const RRDVAR_ACQUIRED *rrdvar_family_context_dim_id;
|
||||
const RRDVAR_ACQUIRED *rrdvar_family_context_dim_name;
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_chart_id_dim_id;
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_chart_id_dim_name;
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_chart_name_dim_id;
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_chart_name_dim_name;
|
||||
|
||||
RRDVAR_FLAGS flags:24;
|
||||
RRDVAR_TYPE type:8;
|
||||
} RRDDIMVAR;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDDIMVAR management
|
||||
// DIMENSION VARIABLES
|
||||
|
||||
#define RRDDIMVAR_ID_MAX 1024
|
||||
|
||||
static inline void rrddimvar_free_variables_unsafe(RRDDIMVAR *rs) {
|
||||
RRDDIM *rd = rs->rrddim;
|
||||
RRDSET *st = rd->rrdset;
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
// CHART VARIABLES FOR THIS DIMENSION
|
||||
|
||||
if(st->rrdvars) {
|
||||
rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local_dim_id);
|
||||
rs->rrdvar_local_dim_id = NULL;
|
||||
|
||||
rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local_dim_name);
|
||||
rs->rrdvar_local_dim_name = NULL;
|
||||
}
|
||||
|
||||
// FAMILY VARIABLES FOR THIS DIMENSION
|
||||
|
||||
if(st->rrdfamily) {
|
||||
rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_id);
|
||||
rs->rrdvar_family_id = NULL;
|
||||
|
||||
rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_name);
|
||||
rs->rrdvar_family_name = NULL;
|
||||
|
||||
rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_context_dim_id);
|
||||
rs->rrdvar_family_context_dim_id = NULL;
|
||||
|
||||
rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_context_dim_name);
|
||||
rs->rrdvar_family_context_dim_name = NULL;
|
||||
}
|
||||
|
||||
// HOST VARIABLES FOR THIS DIMENSION
|
||||
|
||||
if(host->rrdvars && host->health.health_enabled) {
|
||||
rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id_dim_id);
|
||||
rs->rrdvar_host_chart_id_dim_id = NULL;
|
||||
|
||||
rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id_dim_name);
|
||||
rs->rrdvar_host_chart_id_dim_name = NULL;
|
||||
|
||||
rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name_dim_id);
|
||||
rs->rrdvar_host_chart_name_dim_id = NULL;
|
||||
|
||||
rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name_dim_name);
|
||||
rs->rrdvar_host_chart_name_dim_name = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rrddimvar_update_variables_unsafe(RRDDIMVAR *rs) {
|
||||
rrddimvar_free_variables_unsafe(rs);
|
||||
|
||||
RRDDIM *rd = rs->rrddim;
|
||||
RRDSET *st = rd->rrdset;
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
char buffer[RRDDIMVAR_ID_MAX + 1];
|
||||
|
||||
// KEYS
|
||||
|
||||
snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", string2str(rs->prefix), rrddim_id(rd), string2str(rs->suffix));
|
||||
STRING *key_dim_id = string_strdupz(buffer);
|
||||
|
||||
snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", string2str(rs->prefix), rrddim_name(rd), string2str(rs->suffix));
|
||||
STRING *key_dim_name = string_strdupz(buffer);
|
||||
|
||||
snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_id(st), string2str(key_dim_id));
|
||||
STRING *key_chart_id_dim_id = string_strdupz(buffer);
|
||||
|
||||
snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_id(st), string2str(key_dim_name));
|
||||
STRING *key_chart_id_dim_name = string_strdupz(buffer);
|
||||
|
||||
snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_context(st), string2str(key_dim_id));
|
||||
STRING *key_context_dim_id = string_strdupz(buffer);
|
||||
|
||||
snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_context(st), string2str(key_dim_name));
|
||||
STRING *key_context_dim_name = string_strdupz(buffer);
|
||||
|
||||
snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_name(st), string2str(key_dim_id));
|
||||
STRING *key_chart_name_dim_id = string_strdupz(buffer);
|
||||
|
||||
snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", rrdset_name(st), string2str(key_dim_name));
|
||||
STRING *key_chart_name_dim_name = string_strdupz(buffer);
|
||||
|
||||
// CHART VARIABLES FOR THIS DIMENSION
|
||||
// -----------------------------------
|
||||
//
|
||||
// dimensions are available as:
|
||||
// - $id
|
||||
// - $name
|
||||
|
||||
if(st->rrdvars) {
|
||||
rs->rrdvar_local_dim_id = rrdvar_add_and_acquire("local", st->rrdvars, key_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
rs->rrdvar_local_dim_name = rrdvar_add_and_acquire("local", st->rrdvars, key_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
}
|
||||
|
||||
// FAMILY VARIABLES FOR THIS DIMENSION
|
||||
// -----------------------------------
|
||||
//
|
||||
// dimensions are available as:
|
||||
// - $id (only the first, when multiple overlap)
|
||||
// - $name (only the first, when multiple overlap)
|
||||
// - $chart-context.id
|
||||
// - $chart-context.name
|
||||
|
||||
if(st->rrdfamily) {
|
||||
rs->rrdvar_family_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
rs->rrdvar_family_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
rs->rrdvar_family_context_dim_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_context_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
rs->rrdvar_family_context_dim_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_context_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
}
|
||||
|
||||
// HOST VARIABLES FOR THIS DIMENSION
|
||||
// -----------------------------------
|
||||
//
|
||||
// dimensions are available as:
|
||||
// - $chart-id.id
|
||||
// - $chart-id.name
|
||||
// - $chart-name.id
|
||||
// - $chart-name.name
|
||||
|
||||
if(host->rrdvars && host->health.health_enabled) {
|
||||
rs->rrdvar_host_chart_id_dim_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
rs->rrdvar_host_chart_id_dim_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
rs->rrdvar_host_chart_name_dim_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name_dim_id, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
rs->rrdvar_host_chart_name_dim_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name_dim_name, rs->type, RRDVAR_FLAG_NONE, rs->value);
|
||||
}
|
||||
|
||||
// free the keys
|
||||
|
||||
string_freez(key_dim_id);
|
||||
string_freez(key_dim_name);
|
||||
string_freez(key_chart_id_dim_id);
|
||||
string_freez(key_chart_id_dim_name);
|
||||
string_freez(key_context_dim_id);
|
||||
string_freez(key_context_dim_name);
|
||||
string_freez(key_chart_name_dim_id);
|
||||
string_freez(key_chart_name_dim_name);
|
||||
}
|
||||
|
||||
struct rrddimvar_constructor {
|
||||
RRDDIM *rrddim;
|
||||
const char *prefix;
|
||||
const char *suffix;
|
||||
void *value;
|
||||
RRDVAR_FLAGS flags :16;
|
||||
RRDVAR_TYPE type:8;
|
||||
};
|
||||
|
||||
static void rrddimvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *constructor_data) {
|
||||
RRDDIMVAR *rs = rrddimvar;
|
||||
struct rrddimvar_constructor *ctr = constructor_data;
|
||||
|
||||
if(!ctr->prefix) ctr->prefix = "";
|
||||
if(!ctr->suffix) ctr->suffix = "";
|
||||
|
||||
rs->prefix = string_strdupz(ctr->prefix);
|
||||
rs->suffix = string_strdupz(ctr->suffix);
|
||||
|
||||
rs->type = ctr->type;
|
||||
rs->value = ctr->value;
|
||||
rs->flags = ctr->flags;
|
||||
rs->rrddim = ctr->rrddim;
|
||||
|
||||
rrddimvar_update_variables_unsafe(rs);
|
||||
}
|
||||
|
||||
static bool rrddimvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *new_rrddimvar __maybe_unused, void *constructor_data __maybe_unused) {
|
||||
RRDDIMVAR *rs = rrddimvar;
|
||||
rrddimvar_update_variables_unsafe(rs);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void rrddimvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddimvar, void *rrdset __maybe_unused) {
|
||||
RRDDIMVAR *rs = rrddimvar;
|
||||
rrddimvar_free_variables_unsafe(rs);
|
||||
string_freez(rs->prefix);
|
||||
string_freez(rs->suffix);
|
||||
}
|
||||
|
||||
void rrddimvar_index_init(RRDSET *st) {
|
||||
if(!st->rrddimvar_root_index) {
|
||||
st->rrddimvar_root_index = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
&dictionary_stats_category_rrdhealth, sizeof(RRDDIMVAR));
|
||||
|
||||
dictionary_register_insert_callback(st->rrddimvar_root_index, rrddimvar_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(st->rrddimvar_root_index, rrddimvar_conflict_callback, NULL);
|
||||
dictionary_register_delete_callback(st->rrddimvar_root_index, rrddimvar_delete_callback, st);
|
||||
}
|
||||
}
|
||||
|
||||
void rrddimvar_index_destroy(RRDSET *st) {
|
||||
dictionary_destroy(st->rrddimvar_root_index);
|
||||
st->rrddimvar_root_index = NULL;
|
||||
}
|
||||
|
||||
void rrddimvar_add_and_leave_released(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_FLAGS flags) {
|
||||
if(!prefix) prefix = "";
|
||||
if(!suffix) suffix = "";
|
||||
|
||||
char key[RRDDIMVAR_ID_MAX + 1];
|
||||
size_t key_len = snprintfz(key, RRDDIMVAR_ID_MAX, "%s_%s_%s", prefix, rrddim_id(rd), suffix);
|
||||
|
||||
struct rrddimvar_constructor tmp = {
|
||||
.suffix = suffix,
|
||||
.prefix = prefix,
|
||||
.type = type,
|
||||
.flags = flags,
|
||||
.value = value,
|
||||
.rrddim = rd
|
||||
};
|
||||
dictionary_set_advanced(rd->rrdset->rrddimvar_root_index, key, (ssize_t)key_len, NULL, sizeof(RRDDIMVAR), &tmp);
|
||||
}
|
||||
|
||||
void rrddimvar_rename_all(RRDDIM *rd) {
|
||||
RRDSET *st = rd->rrdset;
|
||||
|
||||
netdata_log_debug(D_VARIABLES, "RRDDIMVAR rename for chart id '%s' name '%s', dimension id '%s', name '%s'", rrdset_id(st), rrdset_name(st), rrddim_id(rd), rrddim_name(rd));
|
||||
|
||||
RRDDIMVAR *rs;
|
||||
dfe_start_write(st->rrddimvar_root_index, rs) {
|
||||
if(unlikely(rs->rrddim == rd))
|
||||
rrddimvar_update_variables_unsafe(rs);
|
||||
}
|
||||
dfe_done(rs);
|
||||
}
|
||||
|
||||
void rrddimvar_delete_all(RRDDIM *rd) {
|
||||
RRDSET *st = rd->rrdset;
|
||||
|
||||
netdata_log_debug(D_VARIABLES, "RRDDIMVAR delete for chart id '%s' name '%s', dimension id '%s', name '%s'", rrdset_id(st), rrdset_name(st), rrddim_id(rd), rrddim_name(rd));
|
||||
|
||||
RRDDIMVAR *rs;
|
||||
dfe_start_write(st->rrddimvar_root_index, rs) {
|
||||
if(unlikely(rs->rrddim == rd))
|
||||
dictionary_del(st->rrddimvar_root_index, rs_dfe.name);
|
||||
}
|
||||
dfe_done(rs);
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDDIMVAR_H
|
||||
#define NETDATA_RRDDIMVAR_H 1
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
// variables linked to individual dimensions
|
||||
// We link variables to point the values that are already
|
||||
// calculated / processed by the normal data collection process
|
||||
// This means, there will be no speed penalty for using
|
||||
// these variables
|
||||
|
||||
void rrddimvar_rename_all(RRDDIM *rd);
|
||||
void rrddimvar_add_and_leave_released(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_FLAGS flags);
|
||||
void rrddimvar_delete_all(RRDDIM *rd);
|
||||
|
||||
void rrddimvar_index_init(RRDSET *st);
|
||||
void rrddimvar_index_destroy(RRDSET *st);
|
||||
|
||||
#endif //NETDATA_RRDDIMVAR_H
|
|
@ -1,69 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#define NETDATA_RRD_INTERNALS
|
||||
#include "rrd.h"
|
||||
|
||||
typedef struct rrdfamily {
|
||||
STRING *family;
|
||||
DICTIONARY *rrdvars;
|
||||
} RRDFAMILY;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDFAMILY index
|
||||
|
||||
struct rrdfamily_constructor {
|
||||
const char *family;
|
||||
};
|
||||
|
||||
static void rrdfamily_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdfamily, void *constructor_data) {
|
||||
RRDFAMILY *rf = rrdfamily;
|
||||
struct rrdfamily_constructor *ctr = constructor_data;
|
||||
|
||||
rf->family = string_strdupz(ctr->family);
|
||||
rf->rrdvars = rrdvariables_create();
|
||||
}
|
||||
|
||||
static void rrdfamily_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdfamily, void *rrdhost __maybe_unused) {
|
||||
RRDFAMILY *rf = rrdfamily;
|
||||
string_freez(rf->family);
|
||||
rrdvariables_destroy(rf->rrdvars);
|
||||
rf->family = NULL;
|
||||
rf->rrdvars = NULL;
|
||||
}
|
||||
|
||||
void rrdfamily_index_init(RRDHOST *host) {
|
||||
if(!host->rrdfamily_root_index) {
|
||||
host->rrdfamily_root_index = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
&dictionary_stats_category_rrdhealth, sizeof(RRDFAMILY));
|
||||
|
||||
dictionary_register_insert_callback(host->rrdfamily_root_index, rrdfamily_insert_callback, NULL);
|
||||
dictionary_register_delete_callback(host->rrdfamily_root_index, rrdfamily_delete_callback, host);
|
||||
}
|
||||
}
|
||||
|
||||
void rrdfamily_index_destroy(RRDHOST *host) {
|
||||
dictionary_destroy(host->rrdfamily_root_index);
|
||||
host->rrdfamily_root_index = NULL;
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDFAMILY management
|
||||
|
||||
const RRDFAMILY_ACQUIRED *rrdfamily_add_and_acquire(RRDHOST *host, const char *id) {
|
||||
struct rrdfamily_constructor tmp = {
|
||||
.family = id,
|
||||
};
|
||||
return (const RRDFAMILY_ACQUIRED *)dictionary_set_and_acquire_item_advanced(host->rrdfamily_root_index, id, -1, NULL, sizeof(RRDFAMILY), &tmp);
|
||||
}
|
||||
|
||||
void rrdfamily_release(RRDHOST *host, const RRDFAMILY_ACQUIRED *rfa) {
|
||||
if(unlikely(!rfa)) return;
|
||||
dictionary_acquired_item_release(host->rrdfamily_root_index, (const DICTIONARY_ITEM *)rfa);
|
||||
}
|
||||
|
||||
DICTIONARY *rrdfamily_rrdvars_dict(const RRDFAMILY_ACQUIRED *rfa) {
|
||||
if(unlikely(!rfa)) return NULL;
|
||||
RRDFAMILY *rf = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rfa);
|
||||
return(rf->rrdvars);
|
||||
}
|
|
@ -402,6 +402,19 @@ int rrd_function_run(RRDHOST *host, BUFFER *result_wb, int timeout_s, HTTP_ACCES
|
|||
char sanitized_source[(source ? strlen(source) : 0) + 1];
|
||||
rrd_functions_sanitize(sanitized_source, source ? source : "", sizeof(sanitized_source));
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// check for the host
|
||||
if(!host) {
|
||||
code = HTTP_RESP_INTERNAL_SERVER_ERROR;
|
||||
|
||||
rrd_call_function_error(result_wb, "no host given for running the function", code);
|
||||
|
||||
if(result_cb)
|
||||
result_cb(result_wb, code, result_cb_data);
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// find the function
|
||||
|
||||
|
|
|
@ -230,8 +230,8 @@ static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone, co
|
|||
|
||||
void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode,
|
||||
const char *registry_hostname, const char *os, const char *tags,
|
||||
const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *program_name,
|
||||
const char *program_version)
|
||||
const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *prog_name,
|
||||
const char *prog_version)
|
||||
{
|
||||
|
||||
host->rrd_update_every = update_every;
|
||||
|
@ -241,8 +241,8 @@ void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory
|
|||
rrdhost_init_timezone(host, tzone, abbrev_tzone, utc_offset);
|
||||
rrdhost_init_tags(host, tags);
|
||||
|
||||
host->program_name = string_strdupz((program_name && *program_name) ? program_name : "unknown");
|
||||
host->program_version = string_strdupz((program_version && *program_version) ? program_version : "unknown");
|
||||
host->program_name = string_strdupz((prog_name && *prog_name) ? prog_name : "unknown");
|
||||
host->program_version = string_strdupz((prog_version && *prog_version) ? prog_version : "unknown");
|
||||
host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname) ? registry_hostname : rrdhost_hostname(host));
|
||||
}
|
||||
|
||||
|
@ -288,8 +288,8 @@ static RRDHOST *rrdhost_create(
|
|||
const char *abbrev_timezone,
|
||||
int32_t utc_offset,
|
||||
const char *tags,
|
||||
const char *program_name,
|
||||
const char *program_version,
|
||||
const char *prog_name,
|
||||
const char *prog_version,
|
||||
int update_every,
|
||||
long entries,
|
||||
RRD_MEMORY_MODE memory_mode,
|
||||
|
@ -326,7 +326,9 @@ int is_legacy = 1;
|
|||
strncpyz(host->machine_guid, guid, GUID_LEN + 1);
|
||||
|
||||
set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, registry_hostname, os,
|
||||
tags, timezone, abbrev_timezone, utc_offset, program_name, program_version);
|
||||
tags, timezone, abbrev_timezone, utc_offset,
|
||||
prog_name,
|
||||
prog_version);
|
||||
|
||||
rrdhost_init_hostname(host, hostname, false);
|
||||
|
||||
|
@ -407,8 +409,6 @@ int is_legacy = 1;
|
|||
else
|
||||
error_report("Host machine GUID %s is not valid", host->machine_guid);
|
||||
|
||||
rrdfamily_index_init(host);
|
||||
rrdcalctemplate_index_init(host);
|
||||
rrdcalc_rrdhost_index_init(host);
|
||||
|
||||
if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
|
||||
|
@ -583,28 +583,28 @@ int is_legacy = 1;
|
|||
}
|
||||
|
||||
static void rrdhost_update(RRDHOST *host
|
||||
, const char *hostname
|
||||
, const char *registry_hostname
|
||||
, const char *guid
|
||||
, const char *os
|
||||
, const char *timezone
|
||||
, const char *abbrev_timezone
|
||||
, int32_t utc_offset
|
||||
, const char *tags
|
||||
, const char *program_name
|
||||
, const char *program_version
|
||||
, int update_every
|
||||
, long history
|
||||
, RRD_MEMORY_MODE mode
|
||||
, unsigned int health_enabled
|
||||
, unsigned int rrdpush_enabled
|
||||
, char *rrdpush_destination
|
||||
, char *rrdpush_api_key
|
||||
, char *rrdpush_send_charts_matching
|
||||
, bool rrdpush_enable_replication
|
||||
, time_t rrdpush_seconds_to_replicate
|
||||
, time_t rrdpush_replication_step
|
||||
, struct rrdhost_system_info *system_info
|
||||
, const char *hostname
|
||||
, const char *registry_hostname
|
||||
, const char *guid
|
||||
, const char *os
|
||||
, const char *timezone
|
||||
, const char *abbrev_timezone
|
||||
, int32_t utc_offset
|
||||
, const char *tags
|
||||
, const char *prog_name
|
||||
, const char *prog_version
|
||||
, int update_every
|
||||
, long history
|
||||
, RRD_MEMORY_MODE mode
|
||||
, unsigned int health_enabled
|
||||
, unsigned int rrdpush_enabled
|
||||
, char *rrdpush_destination
|
||||
, char *rrdpush_api_key
|
||||
, char *rrdpush_send_charts_matching
|
||||
, bool rrdpush_enable_replication
|
||||
, time_t rrdpush_seconds_to_replicate
|
||||
, time_t rrdpush_replication_step
|
||||
, struct rrdhost_system_info *system_info
|
||||
)
|
||||
{
|
||||
UNUSED(guid);
|
||||
|
@ -636,23 +636,25 @@ static void rrdhost_update(RRDHOST *host
|
|||
rrdhost_index_add_hostname(host);
|
||||
}
|
||||
|
||||
if(strcmp(rrdhost_program_name(host), program_name) != 0) {
|
||||
if(strcmp(rrdhost_program_name(host), prog_name) != 0) {
|
||||
nd_log(NDLS_DAEMON, NDLP_NOTICE,
|
||||
"Host '%s' switched program name from '%s' to '%s'",
|
||||
rrdhost_hostname(host), rrdhost_program_name(host), program_name);
|
||||
rrdhost_hostname(host), rrdhost_program_name(host),
|
||||
prog_name);
|
||||
|
||||
STRING *t = host->program_name;
|
||||
host->program_name = string_strdupz(program_name);
|
||||
host->program_name = string_strdupz(prog_name);
|
||||
string_freez(t);
|
||||
}
|
||||
|
||||
if(strcmp(rrdhost_program_version(host), program_version) != 0) {
|
||||
if(strcmp(rrdhost_program_version(host), prog_version) != 0) {
|
||||
nd_log(NDLS_DAEMON, NDLP_NOTICE,
|
||||
"Host '%s' switched program version from '%s' to '%s'",
|
||||
rrdhost_hostname(host), rrdhost_program_version(host), program_version);
|
||||
rrdhost_hostname(host), rrdhost_program_version(host),
|
||||
prog_version);
|
||||
|
||||
STRING *t = host->program_version;
|
||||
host->program_version = string_strdupz(program_version);
|
||||
host->program_version = string_strdupz(prog_version);
|
||||
string_freez(t);
|
||||
}
|
||||
|
||||
|
@ -703,8 +705,6 @@ static void rrdhost_update(RRDHOST *host
|
|||
rrdpush_api_key,
|
||||
rrdpush_send_charts_matching);
|
||||
|
||||
rrdfamily_index_init(host);
|
||||
rrdcalctemplate_index_init(host);
|
||||
rrdcalc_rrdhost_index_init(host);
|
||||
|
||||
if(rrdpush_enable_replication)
|
||||
|
@ -727,29 +727,29 @@ static void rrdhost_update(RRDHOST *host
|
|||
}
|
||||
|
||||
RRDHOST *rrdhost_find_or_create(
|
||||
const char *hostname
|
||||
, const char *registry_hostname
|
||||
, const char *guid
|
||||
, const char *os
|
||||
, const char *timezone
|
||||
, const char *abbrev_timezone
|
||||
, int32_t utc_offset
|
||||
, const char *tags
|
||||
, const char *program_name
|
||||
, const char *program_version
|
||||
, int update_every
|
||||
, long history
|
||||
, RRD_MEMORY_MODE mode
|
||||
, unsigned int health_enabled
|
||||
, unsigned int rrdpush_enabled
|
||||
, char *rrdpush_destination
|
||||
, char *rrdpush_api_key
|
||||
, char *rrdpush_send_charts_matching
|
||||
, bool rrdpush_enable_replication
|
||||
, time_t rrdpush_seconds_to_replicate
|
||||
, time_t rrdpush_replication_step
|
||||
, struct rrdhost_system_info *system_info
|
||||
, bool archived
|
||||
const char *hostname
|
||||
, const char *registry_hostname
|
||||
, const char *guid
|
||||
, const char *os
|
||||
, const char *timezone
|
||||
, const char *abbrev_timezone
|
||||
, int32_t utc_offset
|
||||
, const char *tags
|
||||
, const char *prog_name
|
||||
, const char *prog_version
|
||||
, int update_every
|
||||
, long history
|
||||
, RRD_MEMORY_MODE mode
|
||||
, unsigned int health_enabled
|
||||
, unsigned int rrdpush_enabled
|
||||
, char *rrdpush_destination
|
||||
, char *rrdpush_api_key
|
||||
, char *rrdpush_send_charts_matching
|
||||
, bool rrdpush_enable_replication
|
||||
, time_t rrdpush_seconds_to_replicate
|
||||
, time_t rrdpush_replication_step
|
||||
, struct rrdhost_system_info *system_info
|
||||
, bool archived
|
||||
) {
|
||||
RRDHOST *host = rrdhost_find_by_guid(guid);
|
||||
if (unlikely(host && host->rrd_memory_mode != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) {
|
||||
|
@ -780,9 +780,9 @@ RRDHOST *rrdhost_find_or_create(
|
|||
, abbrev_timezone
|
||||
, utc_offset
|
||||
, tags
|
||||
, program_name
|
||||
, program_version
|
||||
, update_every
|
||||
,
|
||||
prog_name,
|
||||
prog_version, update_every
|
||||
, history
|
||||
, mode
|
||||
, health_enabled
|
||||
|
@ -809,9 +809,9 @@ RRDHOST *rrdhost_find_or_create(
|
|||
, abbrev_timezone
|
||||
, utc_offset
|
||||
, tags
|
||||
, program_name
|
||||
, program_version
|
||||
, update_every
|
||||
,
|
||||
prog_name,
|
||||
prog_version, update_every
|
||||
, history
|
||||
, mode
|
||||
, health_enabled
|
||||
|
@ -1046,7 +1046,6 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unitt
|
|||
dbengine_enabled = true;
|
||||
}
|
||||
else {
|
||||
health_init();
|
||||
rrdpush_init();
|
||||
|
||||
if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE || rrdpush_receiver_needs_dbengine()) {
|
||||
|
@ -1094,7 +1093,7 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unitt
|
|||
, default_rrd_update_every
|
||||
, default_rrd_history_entries
|
||||
, default_rrd_memory_mode
|
||||
, default_health_enabled
|
||||
, health_plugin_enabled()
|
||||
, default_rrdpush_enabled
|
||||
, default_rrdpush_destination
|
||||
, default_rrdpush_api_key
|
||||
|
@ -1112,6 +1111,10 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unitt
|
|||
|
||||
dyncfg_host_init(localhost);
|
||||
|
||||
if(!unittest) {
|
||||
health_plugin_init();
|
||||
}
|
||||
|
||||
// we register this only on localhost
|
||||
// for the other nodes, the origin server should register it
|
||||
rrd_function_add_inline(localhost, NULL, "streaming", 10,
|
||||
|
@ -1270,7 +1273,6 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
|
|||
// delete all the RRDSETs of the host
|
||||
rrdset_index_destroy(host);
|
||||
rrdcalc_rrdhost_index_destroy(host);
|
||||
rrdcalctemplate_index_destroy(host);
|
||||
|
||||
// cleanup ML resources
|
||||
ml_host_delete(host);
|
||||
|
@ -1321,11 +1323,10 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
|
|||
simple_pattern_free(host->rrdpush_send_charts_matching);
|
||||
freez(host->node_id);
|
||||
|
||||
rrdfamily_index_destroy(host);
|
||||
rrd_functions_host_destroy(host);
|
||||
rrdvariables_destroy(host->rrdvars);
|
||||
if (host == localhost)
|
||||
rrdvariables_destroy(health_rrdvars);
|
||||
health_plugin_destroy();
|
||||
|
||||
rrdhost_destroy_rrdcontexts(host);
|
||||
|
||||
|
|
|
@ -671,8 +671,11 @@ void rrdlabels_destroy(RRDLABELS *labels)
|
|||
freez(labels);
|
||||
}
|
||||
|
||||
//
|
||||
// Check in labels to see if we have the key specified in label
|
||||
static RRDLABEL *rrdlabels_find_label_with_key_unsafe(RRDLABELS *labels, RRDLABEL *label)
|
||||
// same_value indicates if the value should also be matched
|
||||
//
|
||||
static RRDLABEL *rrdlabels_find_label_with_key_unsafe(RRDLABELS *labels, RRDLABEL *label, bool same_value)
|
||||
{
|
||||
if (unlikely(!labels))
|
||||
return NULL;
|
||||
|
@ -683,7 +686,7 @@ static RRDLABEL *rrdlabels_find_label_with_key_unsafe(RRDLABELS *labels, RRDLABE
|
|||
RRDLABEL *found = NULL;
|
||||
while ((PValue = JudyLFirstThenNext(labels->JudyL, &Index, &first_then_next))) {
|
||||
RRDLABEL *lb = (RRDLABEL *)Index;
|
||||
if (lb->index.key == label->index.key && lb != label) {
|
||||
if (lb->index.key == label->index.key && ((lb == label) == same_value)) {
|
||||
found = (RRDLABEL *)Index;
|
||||
break;
|
||||
}
|
||||
|
@ -718,7 +721,7 @@ static void labels_add_already_sanitized(RRDLABELS *labels, const char *key, con
|
|||
new_ls |= RRDLABEL_FLAG_NEW;
|
||||
*((RRDLABEL_SRC *)PValue) = new_ls;
|
||||
|
||||
RRDLABEL *old_label_with_same_key = rrdlabels_find_label_with_key_unsafe(labels, new_label);
|
||||
RRDLABEL *old_label_with_same_key = rrdlabels_find_label_with_key_unsafe(labels, new_label, false);
|
||||
if (old_label_with_same_key) {
|
||||
(void) JudyLDel(&labels->JudyL, (Word_t) old_label_with_same_key, PJE0);
|
||||
delete_label(old_label_with_same_key);
|
||||
|
@ -1027,6 +1030,39 @@ void rrdlabels_migrate_to_these(RRDLABELS *dst, RRDLABELS *src) {
|
|||
spinlock_unlock(&dst->spinlock);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// Return the common labels count in labels1, labels2
|
||||
//
|
||||
size_t rrdlabels_common_count(RRDLABELS *labels1, RRDLABELS *labels2)
|
||||
{
|
||||
if (!labels1 || !labels2)
|
||||
return 0;
|
||||
|
||||
if (labels1 == labels2)
|
||||
return rrdlabels_entries(labels1);
|
||||
|
||||
RRDLABEL *label;
|
||||
RRDLABEL_SRC ls;
|
||||
|
||||
spinlock_lock(&labels1->spinlock);
|
||||
spinlock_lock(&labels2->spinlock);
|
||||
|
||||
size_t count = 0;
|
||||
lfe_start_nolock(labels2, label, ls)
|
||||
{
|
||||
RRDLABEL *old_label_with_key = rrdlabels_find_label_with_key_unsafe(labels1, label, true);
|
||||
if (old_label_with_key)
|
||||
count++;
|
||||
}
|
||||
lfe_done_nolock();
|
||||
|
||||
spinlock_unlock(&labels2->spinlock);
|
||||
spinlock_unlock(&labels1->spinlock);
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
void rrdlabels_copy(RRDLABELS *dst, RRDLABELS *src)
|
||||
{
|
||||
if (!dst || !src || (dst == src))
|
||||
|
@ -1042,7 +1078,7 @@ void rrdlabels_copy(RRDLABELS *dst, RRDLABELS *src)
|
|||
bool update_statistics = false;
|
||||
lfe_start_nolock(src, label, ls)
|
||||
{
|
||||
RRDLABEL *old_label_with_key = rrdlabels_find_label_with_key_unsafe(dst, label);
|
||||
RRDLABEL *old_label_with_key = rrdlabels_find_label_with_key_unsafe(dst, label, false);
|
||||
Pvoid_t *PValue = JudyLIns(&dst->JudyL, (Word_t)label, PJE0);
|
||||
if(unlikely(!PValue || PValue == PJERR))
|
||||
fatal("RRDLABELS: corrupted labels array");
|
||||
|
|
|
@ -50,6 +50,7 @@ void rrdlabels_to_buffer_json_members(RRDLABELS *labels, BUFFER *wb);
|
|||
|
||||
void rrdlabels_migrate_to_these(RRDLABELS *dst, RRDLABELS *src);
|
||||
void rrdlabels_copy(RRDLABELS *dst, RRDLABELS *src);
|
||||
size_t rrdlabels_common_count(RRDLABELS *labels1, RRDLABELS *labels2);
|
||||
|
||||
int rrdlabels_unittest(void);
|
||||
|
||||
|
|
|
@ -282,15 +282,7 @@ static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v
|
|||
|
||||
rrddim_index_init(st);
|
||||
|
||||
// chart variables - we need this for data collection to work (collector given chart variables) - not only health
|
||||
rrdsetvar_index_init(st);
|
||||
|
||||
if (host->health.health_enabled) {
|
||||
st->rrdfamily = rrdfamily_add_and_acquire(host, rrdset_family(st));
|
||||
st->rrdvars = rrdvariables_create();
|
||||
rrddimvar_index_init(st);
|
||||
}
|
||||
|
||||
st->rrdvars = rrdvariables_create();
|
||||
st->rrdlabels = rrdlabels_create();
|
||||
rrdset_update_permanent_labels(st);
|
||||
|
||||
|
@ -346,36 +338,24 @@ static void rrdset_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v
|
|||
// release the collector info
|
||||
dictionary_destroy(st->functions_view);
|
||||
|
||||
rrdcalc_unlink_all_rrdset_alerts(st);
|
||||
rrdcalc_unlink_and_delete_all_rrdset_alerts(st);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// the order of destruction is important here
|
||||
|
||||
// 1. delete RRDDIMVAR index - this will speed up the destruction of RRDDIMs
|
||||
// because each dimension loops to find its own variables in this index.
|
||||
// There are no references to the items on this index from the dimensions.
|
||||
// To find their own, they have to walk-through the dictionary.
|
||||
rrddimvar_index_destroy(st); // destroy the rrddimvar index
|
||||
|
||||
// 2. delete RRDSETVAR index
|
||||
rrdsetvar_index_destroy(st); // destroy the rrdsetvar index
|
||||
|
||||
// 3. delete RRDVAR index after the above, to avoid triggering its garbage collector (they have references on this)
|
||||
// 1. delete RRDVAR index after the above, to avoid triggering its garbage collector (they have references on this)
|
||||
rrdvariables_destroy(st->rrdvars); // free all variables and destroy the rrdvar dictionary
|
||||
|
||||
// 4. delete RRDFAMILY - this has to be last, because RRDDIMVAR and RRDSETVAR need the reference counter
|
||||
rrdfamily_release(host, st->rrdfamily); // release the acquired rrdfamily -- has to be after all variables
|
||||
|
||||
// 5. delete RRDDIMs, now their variables are not existing, so this is fast
|
||||
// 2. delete RRDDIMs, now their variables are not existing, so this is fast
|
||||
rrddim_index_destroy(st); // free all the dimensions and destroy the dimensions index
|
||||
|
||||
// 6. this has to be after the dimensions are freed, but before labels are freed (contexts need the labels)
|
||||
// 3. this has to be after the dimensions are freed, but before labels are freed (contexts need the labels)
|
||||
rrdcontext_removed_rrdset(st); // let contexts know
|
||||
|
||||
// 7. destroy the chart labels
|
||||
// 4. destroy the chart labels
|
||||
rrdlabels_destroy(st->rrdlabels); // destroy the labels, after letting the contexts know
|
||||
|
||||
// 8. destroy the ml handle
|
||||
// 5. destroy the ml handle
|
||||
ml_chart_delete(st);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -461,8 +441,6 @@ static bool rrdset_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused,
|
|||
if(old_family != st->family)
|
||||
ctr->react_action |= RRDSET_REACT_UPDATED;
|
||||
string_freez(old_family);
|
||||
|
||||
// TODO - we should rename RRDFAMILY variables
|
||||
}
|
||||
|
||||
if(ctr->context && *ctr->context) {
|
||||
|
@ -643,16 +621,10 @@ int rrdset_reset_name(RRDSET *st, const char *name) {
|
|||
rrdset_index_del_name(host, st);
|
||||
string_freez(st->name);
|
||||
st->name = name_string;
|
||||
rrdsetvar_rename_all(st);
|
||||
}
|
||||
else
|
||||
st->name = name_string;
|
||||
|
||||
RRDDIM *rd;
|
||||
rrddim_foreach_read(rd, st)
|
||||
rrddimvar_rename_all(rd);
|
||||
rrddim_foreach_done(rd);
|
||||
|
||||
rrdset_index_add_name(host, st);
|
||||
|
||||
rrdset_flag_clear(st, RRDSET_FLAG_EXPORTING_SEND);
|
||||
|
|
|
@ -1,299 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
typedef struct rrdsetvar {
|
||||
STRING *name; // variable name
|
||||
void *value; // we need this to maintain the allocation for custom chart variables
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_local;
|
||||
const RRDVAR_ACQUIRED *rrdvar_family_chart_id;
|
||||
const RRDVAR_ACQUIRED *rrdvar_family_chart_name;
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_chart_id;
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_chart_name;
|
||||
|
||||
RRDVAR_FLAGS flags:24;
|
||||
RRDVAR_TYPE type:8;
|
||||
} RRDSETVAR;
|
||||
|
||||
// should only be called while the rrdsetvar dict is write locked
|
||||
// otherwise, 2+ threads may be setting the same variables at the same time
|
||||
static inline void rrdsetvar_free_rrdvars_unsafe(RRDSET *st, RRDSETVAR *rs) {
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// CHART
|
||||
|
||||
if(st->rrdvars) {
|
||||
rrdvar_release_and_del(st->rrdvars, rs->rrdvar_local);
|
||||
rs->rrdvar_local = NULL;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// FAMILY
|
||||
|
||||
if(st->rrdfamily) {
|
||||
rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_chart_id);
|
||||
rs->rrdvar_family_chart_id = NULL;
|
||||
|
||||
rrdvar_release_and_del(rrdfamily_rrdvars_dict(st->rrdfamily), rs->rrdvar_family_chart_name);
|
||||
rs->rrdvar_family_chart_name = NULL;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// HOST
|
||||
|
||||
if(host->rrdvars && host->health.health_enabled) {
|
||||
rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_id);
|
||||
rs->rrdvar_host_chart_id = NULL;
|
||||
|
||||
rrdvar_release_and_del(host->rrdvars, rs->rrdvar_host_chart_name);
|
||||
rs->rrdvar_host_chart_name = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// should only be called while the rrdsetvar dict is write locked
|
||||
// otherwise, 2+ threads may be setting the same variables at the same time
|
||||
static inline void rrdsetvar_update_rrdvars_unsafe(RRDSET *st, RRDSETVAR *rs) {
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
RRDVAR_FLAGS options = rs->flags;
|
||||
options &= ~RRDVAR_OPTIONS_REMOVED_WHEN_PROPAGATING_TO_RRDVAR;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// free the old ones (if any)
|
||||
|
||||
rrdsetvar_free_rrdvars_unsafe(st, rs);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// KEYS
|
||||
|
||||
char buffer[RRDVAR_MAX_LENGTH + 1];
|
||||
snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), string2str(rs->name));
|
||||
STRING *key_chart_id = string_strdupz(buffer);
|
||||
|
||||
snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), string2str(rs->name));
|
||||
STRING *key_chart_name = string_strdupz(buffer);
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// CHART
|
||||
|
||||
if(st->rrdvars) {
|
||||
rs->rrdvar_local = rrdvar_add_and_acquire("local", st->rrdvars, rs->name, rs->type, options, rs->value);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// FAMILY
|
||||
|
||||
if(st->rrdfamily) {
|
||||
rs->rrdvar_family_chart_id = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_chart_id, rs->type, options, rs->value);
|
||||
rs->rrdvar_family_chart_name = rrdvar_add_and_acquire("family", rrdfamily_rrdvars_dict(st->rrdfamily), key_chart_name, rs->type, options, rs->value);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// HOST
|
||||
|
||||
if(host->rrdvars && host->health.health_enabled) {
|
||||
rs->rrdvar_host_chart_id = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_id, rs->type, options, rs->value);
|
||||
rs->rrdvar_host_chart_name = rrdvar_add_and_acquire("host", host->rrdvars, key_chart_name, rs->type, options, rs->value);
|
||||
}
|
||||
|
||||
// free the keys
|
||||
string_freez(key_chart_id);
|
||||
string_freez(key_chart_name);
|
||||
}
|
||||
|
||||
static void rrdsetvar_free_value_unsafe(RRDSETVAR *rs) {
|
||||
if(rs->flags & RRDVAR_FLAG_ALLOCATED) {
|
||||
void *old = rs->value;
|
||||
rs->value = NULL;
|
||||
rs->flags &= ~RRDVAR_FLAG_ALLOCATED;
|
||||
freez(old);
|
||||
}
|
||||
}
|
||||
|
||||
static void rrdsetvar_set_value_unsafe(RRDSETVAR *rs, void *new_value) {
|
||||
rrdsetvar_free_value_unsafe(rs);
|
||||
|
||||
if(new_value)
|
||||
rs->value = new_value;
|
||||
else {
|
||||
NETDATA_DOUBLE *n = mallocz(sizeof(NETDATA_DOUBLE));
|
||||
*n = NAN;
|
||||
rs->value = n;
|
||||
rs->flags |= RRDVAR_FLAG_ALLOCATED;
|
||||
}
|
||||
}
|
||||
|
||||
struct rrdsetvar_constructor {
|
||||
RRDSET *rrdset;
|
||||
const char *name;
|
||||
void *value;
|
||||
RRDVAR_FLAGS flags :16;
|
||||
RRDVAR_TYPE type:8;
|
||||
};
|
||||
|
||||
static void rrdsetvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *constructor_data) {
|
||||
RRDSETVAR *rs = rrdsetvar;
|
||||
struct rrdsetvar_constructor *ctr = constructor_data;
|
||||
|
||||
ctr->flags &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS;
|
||||
|
||||
rs->name = string_strdupz(ctr->name);
|
||||
rs->type = ctr->type;
|
||||
rs->flags = ctr->flags;
|
||||
rrdsetvar_set_value_unsafe(rs, ctr->value);
|
||||
|
||||
// create the rrdvariables while we are having a write lock to the dictionary
|
||||
rrdsetvar_update_rrdvars_unsafe(ctr->rrdset, rs);
|
||||
}
|
||||
|
||||
static bool rrdsetvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *new_rrdsetvar __maybe_unused, void *constructor_data) {
|
||||
RRDSETVAR *rs = rrdsetvar;
|
||||
struct rrdsetvar_constructor *ctr = constructor_data;
|
||||
|
||||
ctr->flags &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS;
|
||||
|
||||
RRDVAR_FLAGS options = rs->flags;
|
||||
options &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS;
|
||||
|
||||
if(((ctr->value == NULL && rs->value != NULL && rs->flags & RRDVAR_FLAG_ALLOCATED) || (rs->value == ctr->value))
|
||||
&& ctr->flags == options && rs->type == ctr->type) {
|
||||
// don't reset it - everything is the same, or as it should...
|
||||
return false;
|
||||
}
|
||||
|
||||
internal_error(true, "RRDSETVAR: resetting variable '%s' of chart '%s' of host '%s', options from 0x%x to 0x%x, type from %d to %d",
|
||||
string2str(rs->name), rrdset_id(ctr->rrdset), rrdhost_hostname(ctr->rrdset->rrdhost),
|
||||
options, ctr->flags, rs->type, ctr->type);
|
||||
|
||||
rrdsetvar_free_value_unsafe(rs); // we are going to change the options, so free it before setting it
|
||||
rs->flags = ctr->flags;
|
||||
rs->type = ctr->type;
|
||||
rrdsetvar_set_value_unsafe(rs, ctr->value);
|
||||
|
||||
// recreate the rrdvariables while we are having a write lock to the dictionary
|
||||
rrdsetvar_update_rrdvars_unsafe(ctr->rrdset, rs);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void rrdsetvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdsetvar, void *rrdset __maybe_unused) {
|
||||
RRDSET *st = rrdset;
|
||||
RRDSETVAR *rs = rrdsetvar;
|
||||
|
||||
rrdsetvar_free_rrdvars_unsafe(st, rs);
|
||||
rrdsetvar_free_value_unsafe(rs);
|
||||
string_freez(rs->name);
|
||||
rs->name = NULL;
|
||||
}
|
||||
|
||||
void rrdsetvar_index_init(RRDSET *st) {
|
||||
if(!st->rrdsetvar_root_index) {
|
||||
st->rrdsetvar_root_index = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
&dictionary_stats_category_rrdhealth, sizeof(RRDSETVAR));
|
||||
|
||||
dictionary_register_insert_callback(st->rrdsetvar_root_index, rrdsetvar_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(st->rrdsetvar_root_index, rrdsetvar_conflict_callback, NULL);
|
||||
dictionary_register_delete_callback(st->rrdsetvar_root_index, rrdsetvar_delete_callback, st);
|
||||
}
|
||||
}
|
||||
|
||||
void rrdsetvar_index_destroy(RRDSET *st) {
|
||||
dictionary_destroy(st->rrdsetvar_root_index);
|
||||
st->rrdsetvar_root_index = NULL;
|
||||
}
|
||||
|
||||
const RRDSETVAR_ACQUIRED *rrdsetvar_add_and_acquire(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags) {
|
||||
struct rrdsetvar_constructor tmp = {
|
||||
.name = name,
|
||||
.type = type,
|
||||
.value = value,
|
||||
.flags = flags,
|
||||
.rrdset = st,
|
||||
};
|
||||
|
||||
const RRDSETVAR_ACQUIRED *rsa = (const RRDSETVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(st->rrdsetvar_root_index, name, -1, NULL, sizeof(RRDSETVAR), &tmp);
|
||||
return rsa;
|
||||
}
|
||||
|
||||
void rrdsetvar_add_and_leave_released(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags) {
|
||||
const RRDSETVAR_ACQUIRED *rsa = rrdsetvar_add_and_acquire(st, name, type, value, flags);
|
||||
dictionary_acquired_item_release(st->rrdsetvar_root_index, (const DICTIONARY_ITEM *)rsa);
|
||||
}
|
||||
|
||||
void rrdsetvar_rename_all(RRDSET *st) {
|
||||
netdata_log_debug(D_VARIABLES, "RRDSETVAR rename for chart id '%s' name '%s'", rrdset_id(st), rrdset_name(st));
|
||||
|
||||
RRDSETVAR *rs;
|
||||
dfe_start_write(st->rrdsetvar_root_index, rs) {
|
||||
// should only be called while the rrdsetvar dict is write locked
|
||||
rrdsetvar_update_rrdvars_unsafe(st, rs);
|
||||
}
|
||||
dfe_done(rs);
|
||||
|
||||
rrdcalc_link_matching_alerts_to_rrdset(st);
|
||||
}
|
||||
|
||||
void rrdsetvar_release_and_delete_all(RRDSET *st) {
|
||||
RRDSETVAR *rs;
|
||||
dfe_start_write(st->rrdsetvar_root_index, rs) {
|
||||
dictionary_del_advanced(st->rrdsetvar_root_index, string2str(rs->name), (ssize_t)string_strlen(rs->name));
|
||||
}
|
||||
dfe_done(rs);
|
||||
}
|
||||
|
||||
void rrdsetvar_release(DICTIONARY *dict, const RRDSETVAR_ACQUIRED *rsa) {
|
||||
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rsa);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------------
|
||||
// custom chart variables
|
||||
|
||||
const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name) {
|
||||
STRING *name_string = rrdvar_name_to_string(name);
|
||||
const RRDSETVAR_ACQUIRED *rs = rrdsetvar_add_and_acquire(st, string2str(name_string), RRDVAR_TYPE_CALCULATED, NULL, RRDVAR_FLAG_CUSTOM_CHART_VAR);
|
||||
string_freez(name_string);
|
||||
return rs;
|
||||
}
|
||||
|
||||
void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value) {
|
||||
if(!rsa) return;
|
||||
|
||||
RRDSETVAR *rs = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rsa);
|
||||
|
||||
if(rs->type != RRDVAR_TYPE_CALCULATED || !(rs->flags & RRDVAR_FLAG_CUSTOM_CHART_VAR) || !(rs->flags & RRDVAR_FLAG_ALLOCATED)) {
|
||||
netdata_log_error("RRDSETVAR: requested to set variable '%s' of chart '%s' on host '%s' to value " NETDATA_DOUBLE_FORMAT
|
||||
" but the variable is not a custom chart one (it has options 0x%x, value pointer %p). Ignoring request.",
|
||||
string2str(rs->name),
|
||||
rrdset_id(st),
|
||||
rrdhost_hostname(st->rrdhost),
|
||||
value,
|
||||
(uint32_t)rs->flags, rs->value);
|
||||
}
|
||||
else {
|
||||
NETDATA_DOUBLE *v = rs->value;
|
||||
if(*v != value) {
|
||||
*v = value;
|
||||
rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rrdsetvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb) {
|
||||
rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES);
|
||||
|
||||
// send the chart local custom variables
|
||||
RRDSETVAR *rs;
|
||||
dfe_start_read(st->rrdsetvar_root_index, rs) {
|
||||
if(unlikely(rs->type == RRDVAR_TYPE_CALCULATED && rs->flags & RRDVAR_FLAG_CUSTOM_CHART_VAR)) {
|
||||
NETDATA_DOUBLE *value = (NETDATA_DOUBLE *) rs->value;
|
||||
|
||||
buffer_sprintf(wb
|
||||
, "VARIABLE CHART %s = " NETDATA_DOUBLE_FORMAT "\n"
|
||||
, string2str(rs->name)
|
||||
, *value
|
||||
);
|
||||
}
|
||||
}
|
||||
dfe_done(rs);
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDSETVAR_H
|
||||
#define NETDATA_RRDSETVAR_H 1
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
// variables linked to charts
|
||||
// We link variables to point to the values that are already
|
||||
// calculated / processed by the normal data collection process
|
||||
// This means, there will be no speed penalty for using
|
||||
// these variables
|
||||
|
||||
void rrdsetvar_index_init(RRDSET *st);
|
||||
void rrdsetvar_index_destroy(RRDSET *st);
|
||||
void rrdsetvar_release_and_delete_all(RRDSET *st);
|
||||
|
||||
#define rrdsetvar_custom_chart_variable_release(st, rsa) rrdsetvar_release((st)->rrdsetvar_root_index, rsa)
|
||||
void rrdsetvar_release(DICTIONARY *dict, const RRDSETVAR_ACQUIRED *rsa);
|
||||
|
||||
const RRDSETVAR_ACQUIRED *rrdsetvar_custom_chart_variable_add_and_acquire(RRDSET *st, const char *name);
|
||||
void rrdsetvar_custom_chart_variable_set(RRDSET *st, const RRDSETVAR_ACQUIRED *rsa, NETDATA_DOUBLE value);
|
||||
|
||||
void rrdsetvar_rename_all(RRDSET *st);
|
||||
const RRDSETVAR_ACQUIRED *rrdsetvar_add_and_acquire(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags);
|
||||
void rrdsetvar_add_and_leave_released(RRDSET *st, const char *name, RRDVAR_TYPE type, void *value, RRDVAR_FLAGS flags);
|
||||
|
||||
void rrdsetvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb);
|
||||
|
||||
#endif //NETDATA_RRDSETVAR_H
|
|
@ -1,392 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
// the variables as stored in the variables indexes
|
||||
// there are 3 indexes:
|
||||
// 1. at each chart (RRDSET.rrdvar_root_index)
|
||||
// 2. at each context (RRDFAMILY.rrdvar_root_index)
|
||||
// 3. at each host (RRDHOST.rrdvar_root_index)
|
||||
typedef struct rrdvar {
|
||||
STRING *name;
|
||||
void *value;
|
||||
RRDVAR_FLAGS flags:24;
|
||||
RRDVAR_TYPE type:8;
|
||||
} RRDVAR;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDVAR management
|
||||
|
||||
inline int rrdvar_fix_name(char *variable) {
|
||||
int fixed = 0;
|
||||
while(*variable) {
|
||||
if (!isalnum(*variable) && *variable != '.' && *variable != '_') {
|
||||
*variable++ = '_';
|
||||
fixed++;
|
||||
}
|
||||
else
|
||||
variable++;
|
||||
}
|
||||
|
||||
return fixed;
|
||||
}
|
||||
|
||||
inline STRING *rrdvar_name_to_string(const char *name) {
|
||||
char *variable = strdupz(name);
|
||||
rrdvar_fix_name(variable);
|
||||
STRING *name_string = string_strdupz(variable);
|
||||
freez(variable);
|
||||
return name_string;
|
||||
}
|
||||
|
||||
struct rrdvar_constructor {
|
||||
STRING *name;
|
||||
void *value;
|
||||
RRDVAR_FLAGS options:16;
|
||||
RRDVAR_TYPE type:8;
|
||||
|
||||
enum {
|
||||
RRDVAR_REACT_NONE = 0,
|
||||
RRDVAR_REACT_NEW = (1 << 0),
|
||||
} react_action;
|
||||
};
|
||||
|
||||
static void rrdvar_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdvar, void *constructor_data) {
|
||||
RRDVAR *rv = rrdvar;
|
||||
struct rrdvar_constructor *ctr = constructor_data;
|
||||
|
||||
ctr->options &= ~RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS;
|
||||
|
||||
rv->name = string_dup(ctr->name);
|
||||
rv->type = ctr->type;
|
||||
rv->flags = ctr->options;
|
||||
|
||||
if(!ctr->value) {
|
||||
NETDATA_DOUBLE *v = mallocz(sizeof(NETDATA_DOUBLE));
|
||||
*v = NAN;
|
||||
rv->value = v;
|
||||
rv->flags |= RRDVAR_FLAG_ALLOCATED;
|
||||
}
|
||||
else
|
||||
rv->value = ctr->value;
|
||||
|
||||
ctr->react_action = RRDVAR_REACT_NEW;
|
||||
}
|
||||
|
||||
static void rrdvar_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdvar, void *nothing __maybe_unused) {
|
||||
RRDVAR *rv = rrdvar;
|
||||
|
||||
if(rv->flags & RRDVAR_FLAG_ALLOCATED)
|
||||
freez(rv->value);
|
||||
|
||||
string_freez(rv->name);
|
||||
rv->name = NULL;
|
||||
}
|
||||
|
||||
DICTIONARY *rrdvariables_create(void) {
|
||||
DICTIONARY *dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
&dictionary_stats_category_rrdhealth, sizeof(RRDVAR));
|
||||
|
||||
dictionary_register_insert_callback(dict, rrdvar_insert_callback, NULL);
|
||||
dictionary_register_delete_callback(dict, rrdvar_delete_callback, NULL);
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
DICTIONARY *health_rrdvariables_create(void) {
|
||||
DICTIONARY *dict = dictionary_create_advanced(DICT_OPTION_NONE, &dictionary_stats_category_rrdhealth, 0);
|
||||
|
||||
dictionary_register_insert_callback(dict, rrdvar_insert_callback, NULL);
|
||||
dictionary_register_delete_callback(dict, rrdvar_delete_callback, NULL);
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
void rrdvariables_destroy(DICTIONARY *dict) {
|
||||
dictionary_destroy(dict);
|
||||
}
|
||||
|
||||
static inline const RRDVAR_ACQUIRED *rrdvar_get_and_acquire(DICTIONARY *dict, STRING *name) {
|
||||
return (const RRDVAR_ACQUIRED *)dictionary_get_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name));
|
||||
}
|
||||
|
||||
inline void rrdvar_release_and_del(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) {
|
||||
if(unlikely(!dict || !rva)) return;
|
||||
|
||||
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
||||
|
||||
dictionary_del_advanced(dict, string2str(rv->name), (ssize_t)string_strlen(rv->name));
|
||||
|
||||
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
|
||||
}
|
||||
|
||||
inline const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(const char *scope __maybe_unused, DICTIONARY *dict, STRING *name, RRDVAR_TYPE type, RRDVAR_FLAGS options, void *value) {
|
||||
if(unlikely(!dict || !name)) return NULL;
|
||||
|
||||
struct rrdvar_constructor tmp = {
|
||||
.name = name,
|
||||
.value = value,
|
||||
.type = type,
|
||||
.options = options,
|
||||
.react_action = RRDVAR_REACT_NONE,
|
||||
};
|
||||
return (const RRDVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name), NULL, sizeof(RRDVAR), &tmp);
|
||||
}
|
||||
|
||||
inline void rrdvar_add(const char *scope __maybe_unused, DICTIONARY *dict, STRING *name, RRDVAR_TYPE type, RRDVAR_FLAGS options, void *value) {
|
||||
if(unlikely(!dict || !name)) return;
|
||||
|
||||
struct rrdvar_constructor tmp = {
|
||||
.name = name,
|
||||
.value = value,
|
||||
.type = type,
|
||||
.options = options,
|
||||
.react_action = RRDVAR_REACT_NONE,
|
||||
};
|
||||
dictionary_set_advanced(dict, string2str(name), (ssize_t)string_strlen(name), NULL, sizeof(RRDVAR), &tmp);
|
||||
}
|
||||
|
||||
void rrdvar_delete_all(DICTIONARY *dict) {
|
||||
dictionary_flush(dict);
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// CUSTOM HOST VARIABLES
|
||||
|
||||
inline int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data) {
|
||||
if(unlikely(!dict)) return 0; // when health is not enabled
|
||||
return dictionary_walkthrough_read(dict, callback, data);
|
||||
}
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_custom_host_variable_add_and_acquire(RRDHOST *host, const char *name) {
|
||||
DICTIONARY *dict = host->rrdvars;
|
||||
if(unlikely(!dict)) return NULL; // when health is not enabled
|
||||
|
||||
STRING *name_string = rrdvar_name_to_string(name);
|
||||
|
||||
const RRDVAR_ACQUIRED *rva = rrdvar_add_and_acquire("host", dict, name_string, RRDVAR_TYPE_CALCULATED, RRDVAR_FLAG_CUSTOM_HOST_VAR, NULL);
|
||||
|
||||
string_freez(name_string);
|
||||
return rva;
|
||||
}
|
||||
|
||||
void rrdvar_custom_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) {
|
||||
if(unlikely(!host->rrdvars || !rva)) return; // when health is not enabled
|
||||
|
||||
if(rrdvar_type(rva) != RRDVAR_TYPE_CALCULATED || !(rrdvar_flags(rva) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_ALLOCATED)))
|
||||
netdata_log_error("requested to set variable '%s' to value " NETDATA_DOUBLE_FORMAT " but the variable is not a custom one.", rrdvar_name(rva), value);
|
||||
else {
|
||||
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
||||
NETDATA_DOUBLE *v = rv->value;
|
||||
if(*v != value) {
|
||||
*v = value;
|
||||
|
||||
// if the host is streaming, send this variable upstream immediately
|
||||
rrdpush_sender_send_this_host_variable_now(host, rva);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) {
|
||||
if(unlikely(!dict || !rva)) return; // when health is not enabled
|
||||
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDVAR lookup
|
||||
|
||||
NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva) {
|
||||
if(unlikely(!rva)) return NAN;
|
||||
|
||||
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
||||
|
||||
switch(rv->type) {
|
||||
case RRDVAR_TYPE_CALCULATED: {
|
||||
NETDATA_DOUBLE *n = (NETDATA_DOUBLE *)rv->value;
|
||||
return *n;
|
||||
}
|
||||
|
||||
case RRDVAR_TYPE_TIME_T: {
|
||||
time_t *n = (time_t *)rv->value;
|
||||
return (NETDATA_DOUBLE)*n;
|
||||
}
|
||||
|
||||
case RRDVAR_TYPE_COLLECTED: {
|
||||
collected_number *n = (collected_number *)rv->value;
|
||||
return (NETDATA_DOUBLE)*n;
|
||||
}
|
||||
|
||||
case RRDVAR_TYPE_TOTAL: {
|
||||
total_number *n = (total_number *)rv->value;
|
||||
return (NETDATA_DOUBLE)*n;
|
||||
}
|
||||
|
||||
case RRDVAR_TYPE_INT: {
|
||||
int *n = (int *)rv->value;
|
||||
return *n;
|
||||
}
|
||||
|
||||
default:
|
||||
netdata_log_error("I don't know how to convert RRDVAR type %u to NETDATA_DOUBLE", rv->type);
|
||||
return NAN;
|
||||
}
|
||||
}
|
||||
|
||||
int health_variable_check(DICTIONARY *dict, RRDSET *st, RRDDIM *rd) {
|
||||
if (!dict || !st || !rd) return 0;
|
||||
|
||||
STRING *helper_str;
|
||||
char helper[RRDVAR_MAX_LENGTH + 1];
|
||||
snprintfz(helper, RRDVAR_MAX_LENGTH, "%s.%s", string2str(st->name), string2str(rd->name));
|
||||
helper_str = string_strdupz(helper);
|
||||
|
||||
const RRDVAR_ACQUIRED *rva;
|
||||
rva = rrdvar_get_and_acquire(dict, helper_str);
|
||||
if(rva) {
|
||||
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
|
||||
string_freez(helper_str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
string_freez(helper_str);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void rrdvar_store_for_chart(RRDHOST *host, RRDSET *st) {
|
||||
if (!st) return;
|
||||
|
||||
if(!st->rrdfamily)
|
||||
st->rrdfamily = rrdfamily_add_and_acquire(host, rrdset_family(st));
|
||||
|
||||
if(!st->rrdvars)
|
||||
st->rrdvars = rrdvariables_create();
|
||||
|
||||
rrddimvar_index_init(st);
|
||||
|
||||
rrdsetvar_add_and_leave_released(st, "last_collected_t", RRDVAR_TYPE_TIME_T, &st->last_collected_time.tv_sec, RRDVAR_FLAG_NONE);
|
||||
rrdsetvar_add_and_leave_released(st, "green", RRDVAR_TYPE_CALCULATED, &st->green, RRDVAR_FLAG_NONE);
|
||||
rrdsetvar_add_and_leave_released(st, "red", RRDVAR_TYPE_CALCULATED, &st->red, RRDVAR_FLAG_NONE);
|
||||
rrdsetvar_add_and_leave_released(st, "update_every", RRDVAR_TYPE_INT, &st->update_every, RRDVAR_FLAG_NONE);
|
||||
|
||||
RRDDIM *rd;
|
||||
rrddim_foreach_read(rd, st) {
|
||||
rrddimvar_add_and_leave_released(rd, RRDVAR_TYPE_CALCULATED, NULL, NULL, &rd->collector.last_stored_value, RRDVAR_FLAG_NONE);
|
||||
rrddimvar_add_and_leave_released(rd, RRDVAR_TYPE_COLLECTED, NULL, "_raw", &rd->collector.last_collected_value, RRDVAR_FLAG_NONE);
|
||||
rrddimvar_add_and_leave_released(rd, RRDVAR_TYPE_TIME_T, NULL, "_last_collected_t", &rd->collector.last_collected_time.tv_sec, RRDVAR_FLAG_NONE);
|
||||
}
|
||||
rrddim_foreach_done(rd);
|
||||
}
|
||||
|
||||
int health_variable_lookup(STRING *variable, RRDCALC *rc, NETDATA_DOUBLE *result) {
|
||||
RRDSET *st = rc->rrdset;
|
||||
if(!st) return 0;
|
||||
|
||||
RRDHOST *host = st->rrdhost;
|
||||
const RRDVAR_ACQUIRED *rva;
|
||||
|
||||
rva = rrdvar_get_and_acquire(st->rrdvars, variable);
|
||||
if(rva) {
|
||||
*result = rrdvar2number(rva);
|
||||
dictionary_acquired_item_release(st->rrdvars, (const DICTIONARY_ITEM *)rva);
|
||||
return 1;
|
||||
}
|
||||
|
||||
rva = rrdvar_get_and_acquire(rrdfamily_rrdvars_dict(st->rrdfamily), variable);
|
||||
if(rva) {
|
||||
*result = rrdvar2number(rva);
|
||||
dictionary_acquired_item_release(rrdfamily_rrdvars_dict(st->rrdfamily), (const DICTIONARY_ITEM *)rva);
|
||||
return 1;
|
||||
}
|
||||
|
||||
rva = rrdvar_get_and_acquire(host->rrdvars, variable);
|
||||
if(rva) {
|
||||
*result = rrdvar2number(rva);
|
||||
dictionary_acquired_item_release(host->rrdvars, (const DICTIONARY_ITEM *)rva);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDVAR to JSON
|
||||
|
||||
struct variable2json_helper {
|
||||
BUFFER *buf;
|
||||
RRDVAR_FLAGS options;
|
||||
};
|
||||
|
||||
static int single_variable2json_callback(const DICTIONARY_ITEM *item __maybe_unused, void *entry __maybe_unused, void *helper_data) {
|
||||
struct variable2json_helper *helper = (struct variable2json_helper *)helper_data;
|
||||
const RRDVAR_ACQUIRED *rva = (const RRDVAR_ACQUIRED *)item;
|
||||
NETDATA_DOUBLE value = rrdvar2number(rva);
|
||||
|
||||
if (helper->options == RRDVAR_FLAG_NONE || rrdvar_flags(rva) & helper->options) {
|
||||
if(unlikely(isnan(value) || isinf(value)))
|
||||
buffer_json_member_add_string(helper->buf, rrdvar_name(rva), NULL);
|
||||
else
|
||||
buffer_json_member_add_double(helper->buf, rrdvar_name(rva), (NETDATA_DOUBLE)value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void health_api_v1_chart_custom_variables2json(RRDSET *st, BUFFER *buf) {
|
||||
struct variable2json_helper helper = {.buf = buf, .options = RRDVAR_FLAG_CUSTOM_CHART_VAR};
|
||||
|
||||
rrdvar_walkthrough_read(st->rrdvars, single_variable2json_callback, &helper);
|
||||
}
|
||||
|
||||
void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf) {
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
struct variable2json_helper helper = {.buf = buf, .options = RRDVAR_FLAG_NONE};
|
||||
|
||||
buffer_json_initialize(buf, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
|
||||
|
||||
buffer_json_member_add_string(buf, "chart", rrdset_id(st));
|
||||
buffer_json_member_add_string(buf, "chart_name", rrdset_name(st));
|
||||
buffer_json_member_add_string(buf, "chart_context", rrdset_context(st));
|
||||
|
||||
{
|
||||
buffer_json_member_add_object(buf, "chart_variables");
|
||||
rrdvar_walkthrough_read(st->rrdvars, single_variable2json_callback, &helper);
|
||||
buffer_json_object_close(buf);
|
||||
}
|
||||
|
||||
buffer_json_member_add_string(buf, "family", rrdset_family(st));
|
||||
|
||||
{
|
||||
buffer_json_member_add_object(buf, "family_variables");
|
||||
rrdvar_walkthrough_read(rrdfamily_rrdvars_dict(st->rrdfamily), single_variable2json_callback, &helper);
|
||||
buffer_json_object_close(buf);
|
||||
}
|
||||
|
||||
buffer_json_member_add_string(buf, "host", rrdhost_hostname(host));
|
||||
|
||||
{
|
||||
buffer_json_member_add_object(buf, "host_variables");
|
||||
rrdvar_walkthrough_read(host->rrdvars, single_variable2json_callback, &helper);
|
||||
buffer_json_object_close(buf);
|
||||
}
|
||||
|
||||
buffer_json_finalize(buf);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDVAR private members examination
|
||||
|
||||
const char *rrdvar_name(const RRDVAR_ACQUIRED *rva) {
|
||||
return dictionary_acquired_item_name((const DICTIONARY_ITEM *)rva);
|
||||
}
|
||||
|
||||
RRDVAR_FLAGS rrdvar_flags(const RRDVAR_ACQUIRED *rva) {
|
||||
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
||||
return rv->flags;
|
||||
}
|
||||
RRDVAR_TYPE rrdvar_type(const RRDVAR_ACQUIRED *rva) {
|
||||
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
||||
return rv->type;
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDVAR_H
|
||||
#define NETDATA_RRDVAR_H 1
|
||||
|
||||
#include "libnetdata/libnetdata.h"
|
||||
|
||||
typedef enum rrdvar_type {
|
||||
RRDVAR_TYPE_CALCULATED = 1,
|
||||
RRDVAR_TYPE_TIME_T = 2,
|
||||
RRDVAR_TYPE_COLLECTED = 3,
|
||||
RRDVAR_TYPE_TOTAL = 4,
|
||||
RRDVAR_TYPE_INT = 5
|
||||
|
||||
// this is 8 bit
|
||||
// to increase it you have to set change the bitfield in
|
||||
// rrdvar, rrdsetvar, rrddimvar
|
||||
} RRDVAR_TYPE;
|
||||
|
||||
typedef enum rrdvar_options {
|
||||
RRDVAR_FLAG_NONE = 0,
|
||||
RRDVAR_FLAG_ALLOCATED = (1 << 0), // the value ptr is allocated (not a reference)
|
||||
RRDVAR_FLAG_CUSTOM_HOST_VAR = (1 << 1), // this is a custom host variable, not associated with a dimension
|
||||
RRDVAR_FLAG_CUSTOM_CHART_VAR = (1 << 2), // this is a custom chart variable, not associated with a dimension
|
||||
RRDVAR_FLAG_RRDCALC_LOCAL_VAR = (1 << 3), // this is a an alarm variable, attached to a chart
|
||||
RRDVAR_FLAG_RRDCALC_FAMILY_VAR = (1 << 4), // this is a an alarm variable, attached to a family
|
||||
RRDVAR_FLAG_RRDCALC_HOST_CHARTID_VAR = (1 << 5), // this is a an alarm variable, attached to the host, using the chart id
|
||||
RRDVAR_FLAG_RRDCALC_HOST_CHARTNAME_VAR = (1 << 6), // this is a an alarm variable, attached to the host, using the chart name
|
||||
RRDVAR_FLAG_CONFIG_VAR = (1 << 7), // this is a an alarm variable, read from alarm config
|
||||
|
||||
// this is 24 bit
|
||||
// to increase it you have to set change the bitfield in
|
||||
// rrdvar, rrdsetvar, rrddimvar
|
||||
} RRDVAR_FLAGS;
|
||||
|
||||
#define RRDVAR_OPTIONS_REMOVED_ON_NEW_OBJECTS \
|
||||
(RRDVAR_FLAG_ALLOCATED)
|
||||
|
||||
#define RRDVAR_OPTIONS_REMOVED_WHEN_PROPAGATING_TO_RRDVAR \
|
||||
(RRDVAR_FLAG_ALLOCATED)
|
||||
|
||||
#define RRDVAR_MAX_LENGTH 1024
|
||||
|
||||
int rrdvar_fix_name(char *variable);
|
||||
|
||||
#include "rrd.h"
|
||||
|
||||
STRING *rrdvar_name_to_string(const char *name);
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_custom_host_variable_add_and_acquire(RRDHOST *host, const char *name);
|
||||
void rrdvar_add(const char *scope __maybe_unused, DICTIONARY *dict, STRING *name, RRDVAR_TYPE type, RRDVAR_FLAGS options, void *value);
|
||||
void rrdvar_custom_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value);
|
||||
|
||||
int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data);
|
||||
|
||||
#define rrdvar_custom_host_variable_release(host, rva) rrdvar_release((host)->rrdvars, rva)
|
||||
void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva);
|
||||
|
||||
NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva);
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(const char *scope, DICTIONARY *dict, STRING *name, RRDVAR_TYPE type, RRDVAR_FLAGS options, void *value);
|
||||
void rrdvar_release_and_del(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva);
|
||||
|
||||
DICTIONARY *rrdvariables_create(void);
|
||||
DICTIONARY *health_rrdvariables_create(void);
|
||||
void rrdvariables_destroy(DICTIONARY *dict);
|
||||
|
||||
void rrdvar_store_for_chart(RRDHOST *host, RRDSET *st);
|
||||
int health_variable_check(DICTIONARY *dict, RRDSET *st, RRDDIM *rd);
|
||||
|
||||
void rrdvar_delete_all(DICTIONARY *dict);
|
||||
|
||||
const char *rrdvar_name(const RRDVAR_ACQUIRED *rva);
|
||||
RRDVAR_FLAGS rrdvar_flags(const RRDVAR_ACQUIRED *rva);
|
||||
RRDVAR_TYPE rrdvar_type(const RRDVAR_ACQUIRED *rva);
|
||||
|
||||
#endif //NETDATA_RRDVAR_H
|
|
@ -821,7 +821,7 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN
|
|||
#endif
|
||||
|
||||
#ifdef ENABLE_ACLK
|
||||
static bool have_recent_alarm(RRDHOST *host, int64_t alarm_id, int64_t mark)
|
||||
static bool have_recent_alarm_unsafe(RRDHOST *host, int64_t alarm_id, int64_t mark)
|
||||
{
|
||||
ALARM_ENTRY *ae = host->health_log.alarms;
|
||||
|
||||
|
@ -882,7 +882,7 @@ void aclk_push_alert_snapshot_event(char *node_id __maybe_unused)
|
|||
if (unlikely(ae->new_status == RRDCALC_STATUS_UNINITIALIZED))
|
||||
continue;
|
||||
|
||||
if (have_recent_alarm(host, ae->alarm_id, ae->unique_id))
|
||||
if (have_recent_alarm_unsafe(host, ae->alarm_id, ae->unique_id))
|
||||
continue;
|
||||
|
||||
if (is_event_from_alert_variable_config(ae->unique_id, &host->host_uuid))
|
||||
|
@ -911,7 +911,7 @@ void aclk_push_alert_snapshot_event(char *node_id __maybe_unused)
|
|||
if (likely(ae->updated_by_id) || unlikely(ae->new_status == RRDCALC_STATUS_UNINITIALIZED))
|
||||
continue;
|
||||
|
||||
if (have_recent_alarm(host, ae->alarm_id, ae->unique_id))
|
||||
if (have_recent_alarm_unsafe(host, ae->alarm_id, ae->unique_id))
|
||||
continue;
|
||||
|
||||
if (is_event_from_alert_variable_config(ae->unique_id, &host->host_uuid))
|
||||
|
@ -1090,7 +1090,7 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused)
|
|||
}
|
||||
|
||||
active_alerts[cnt].name = (char *)rrdcalc_name(rc);
|
||||
len += string_strlen(rc->name);
|
||||
len += string_strlen(rc->config.name);
|
||||
active_alerts[cnt].chart = (char *)rrdcalc_chart_name(rc);
|
||||
len += string_strlen(rc->chart);
|
||||
active_alerts[cnt].status = rc->status;
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "sqlite_health.h"
|
||||
#include "sqlite_functions.h"
|
||||
#include "sqlite_db_migration.h"
|
||||
#include "../../health/health_internals.h"
|
||||
|
||||
#define MAX_HEALTH_SQL_SIZE 2048
|
||||
#define SQLITE3_BIND_STRING_OR_NULL(res, key, param) \
|
||||
|
@ -101,7 +102,8 @@ failed:
|
|||
"config_hash_id, name, chart, exec, recipient, units, chart_context, last_transition_id, chart_name) " \
|
||||
"VALUES (@host_id,@alarm_id, @config_hash_id,@name,@chart,@exec,@recipient,@units,@chart_context," \
|
||||
"@last_transition_id,@chart_name) ON CONFLICT (host_id, alarm_id) DO UPDATE " \
|
||||
"SET last_transition_id = excluded.last_transition_id, chart_name = excluded.chart_name RETURNING health_log_id"
|
||||
"SET last_transition_id = excluded.last_transition_id, chart_name = excluded.chart_name, " \
|
||||
"config_hash_id=excluded.config_hash_id RETURNING health_log_id"
|
||||
|
||||
#define SQL_INSERT_HEALTH_LOG_DETAIL \
|
||||
"INSERT INTO health_log_detail (health_log_id, unique_id, alarm_id, alarm_event_id, " \
|
||||
|
@ -958,7 +960,7 @@ void sql_health_alarm_log_load(RRDHOST *host)
|
|||
"@p_db_lookup_dimensions,@p_db_lookup_method,@p_db_lookup_options,@p_db_lookup_after," \
|
||||
"@p_db_lookup_before,@p_update_every,@source,@chart_labels,@summary)"
|
||||
|
||||
int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg)
|
||||
int sql_alert_store_config(RRD_ALERT_PROTOTYPE *ap __maybe_unused)
|
||||
{
|
||||
static __thread sqlite3_stmt *res = NULL;
|
||||
int rc, param = 0;
|
||||
|
@ -977,133 +979,170 @@ int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg)
|
|||
return 1;
|
||||
}
|
||||
}
|
||||
BUFFER *buf = buffer_create(128, NULL);
|
||||
|
||||
rc = sqlite3_bind_blob(res, ++param, hash_id, sizeof(*hash_id), SQLITE_STATIC);
|
||||
rc = sqlite3_bind_blob(res, ++param, &ap->config.hash_id, sizeof(ap->config.hash_id), SQLITE_STATIC);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->alarm, ++param);
|
||||
if (ap->match.is_template)
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, NULL, ++param);
|
||||
else
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.name, ++param);
|
||||
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->template_key, ++param);
|
||||
if (ap->match.is_template)
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.name, ++param);
|
||||
else
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, NULL, ++param);
|
||||
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->on, ++param);
|
||||
if (ap->match.is_template)
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.on.context, ++param);
|
||||
else
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.on.chart, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->classification, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.classification, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->component, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.component, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->type, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.type, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->os, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.os, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->host, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.host, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->lookup, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.lookup, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->every, ++param);
|
||||
rc = sqlite3_bind_int(res, ++param, ap->config.update_every);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->units, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.units, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->calc, ++param);
|
||||
if (ap->config.calculation)
|
||||
rc = sqlite3_bind_text(res, ++param, expression_source(ap->config.calculation), -1, SQLITE_STATIC);
|
||||
else
|
||||
rc = sqlite3_bind_null(res, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->plugin, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.plugin, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->module, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.module, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->charts, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.charts, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->green, ++param);
|
||||
rc = sqlite3_bind_double(res, ++param, ap->config.green);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->red, ++param);
|
||||
rc = sqlite3_bind_double(res, ++param, ap->config.red);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->warn, ++param);
|
||||
if (ap->config.warning)
|
||||
rc = sqlite3_bind_text(res, ++param, expression_source(ap->config.warning), -1, SQLITE_STATIC);
|
||||
else
|
||||
rc = sqlite3_bind_null(res, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->crit, ++param);
|
||||
if (ap->config.critical)
|
||||
rc = sqlite3_bind_text(res, ++param, expression_source(ap->config.critical), -1, SQLITE_STATIC);
|
||||
else
|
||||
rc = sqlite3_bind_null(res, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->exec, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.exec, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->to, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.recipient, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->info, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.info, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->delay, ++param);
|
||||
if (ap->config.delay_up_duration)
|
||||
buffer_sprintf(buf, "up %ds ", ap->config.delay_up_duration);
|
||||
|
||||
if (ap->config.delay_down_duration)
|
||||
buffer_sprintf(buf, "down %ds ", ap->config.delay_down_duration);
|
||||
|
||||
if (ap->config.delay_multiplier)
|
||||
buffer_sprintf(buf, "multiplier %.1f ", ap->config.delay_multiplier);
|
||||
|
||||
if (ap->config.delay_max_duration)
|
||||
buffer_sprintf(buf, "max %ds", ap->config.delay_max_duration);
|
||||
|
||||
// delay
|
||||
rc = sqlite3_bind_text(res, ++param, buffer_tostring(buf), -1, SQLITE_STATIC);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->options, ++param);
|
||||
if (ap->config.alert_action_options & ALERT_ACTION_OPTION_NO_CLEAR_NOTIFICATION)
|
||||
rc = sqlite3_bind_text(res, ++param, "no-clear-notification", -1, SQLITE_STATIC);
|
||||
else
|
||||
rc = sqlite3_bind_null(res, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->repeat, ++param);
|
||||
rc = sqlite3_bind_int(res, ++param, ap->config.update_every);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->host_labels, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.host_labels, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
if (cfg->p_db_lookup_after) {
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->p_db_lookup_dimensions, ++param);
|
||||
if (ap->config.after) {
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.dimensions, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->p_db_lookup_method, ++param);
|
||||
rc = sqlite3_bind_text(res, ++param, time_grouping_id2txt(ap->config.group), -1, SQLITE_STATIC);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = sqlite3_bind_int(res, ++param, (int) cfg->p_db_lookup_options);
|
||||
rc = sqlite3_bind_int(res, ++param, (int) ap->config.options);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = sqlite3_bind_int(res, ++param, (int) cfg->p_db_lookup_after);
|
||||
rc = sqlite3_bind_int64(res, ++param, (int) ap->config.after);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = sqlite3_bind_int(res, ++param, (int) cfg->p_db_lookup_before);
|
||||
rc = sqlite3_bind_int64(res, ++param, (int) ap->config.before);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
} else {
|
||||
|
@ -1128,19 +1167,19 @@ int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg)
|
|||
goto bind_fail;
|
||||
}
|
||||
|
||||
rc = sqlite3_bind_int(res, ++param, cfg->p_update_every);
|
||||
rc = sqlite3_bind_int(res, ++param, ap->config.update_every);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->source, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.source, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->chart_labels, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.chart_labels, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->summary, ++param);
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.summary, ++param);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
goto bind_fail;
|
||||
|
||||
|
@ -1152,9 +1191,11 @@ int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg)
|
|||
if (unlikely(rc != SQLITE_OK))
|
||||
error_report("Failed to reset statement in alert hash_id store function, rc = %d", rc);
|
||||
|
||||
buffer_free(buf);
|
||||
return 0;
|
||||
|
||||
bind_fail:
|
||||
buffer_free(buf);
|
||||
error_report("Failed to bind parameter %d to store alert hash_id, rc = %d", param, rc);
|
||||
rc = sqlite3_reset(res);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
|
@ -1162,75 +1203,6 @@ bind_fail:
|
|||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
alert hashes are used for cloud communication.
|
||||
if cloud is disabled or openssl is not available (which will prevent cloud connectivity)
|
||||
skip hash calculations
|
||||
*/
|
||||
#if defined ENABLE_HTTPS
|
||||
#define DIGEST_ALERT_CONFIG_VAL(v) ((v) ? EVP_DigestUpdate(evpctx, (string2str(v)), string_strlen((v))) : EVP_DigestUpdate(evpctx, "", 1))
|
||||
#endif
|
||||
int alert_hash_and_store_config(
|
||||
uuid_t hash_id,
|
||||
struct alert_config *cfg,
|
||||
int store_hash)
|
||||
{
|
||||
#if defined ENABLE_HTTPS
|
||||
EVP_MD_CTX *evpctx;
|
||||
unsigned char hash_value[EVP_MAX_MD_SIZE];
|
||||
unsigned int hash_len;
|
||||
evpctx = EVP_MD_CTX_create();
|
||||
EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL);
|
||||
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->alarm);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->template_key);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->os);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->host);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->on);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->plugin);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->module);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->charts);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->lookup);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->calc);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->every);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->green);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->red);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->warn);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->crit);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->exec);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->to);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->units);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->info);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->classification);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->component);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->type);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->delay);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->options);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->repeat);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->host_labels);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->chart_labels);
|
||||
DIGEST_ALERT_CONFIG_VAL(cfg->summary);
|
||||
|
||||
EVP_DigestFinal_ex(evpctx, hash_value, &hash_len);
|
||||
EVP_MD_CTX_destroy(evpctx);
|
||||
fatal_assert(hash_len > sizeof(uuid_t));
|
||||
|
||||
char uuid_str[UUID_STR_LEN];
|
||||
uuid_unparse_lower(*((uuid_t *)&hash_value), uuid_str);
|
||||
uuid_copy(hash_id, *((uuid_t *)&hash_value));
|
||||
|
||||
/* store everything, so it can be recreated when not in memory or just a subset ? */
|
||||
if (store_hash)
|
||||
(void)sql_store_alert_config_hash( (uuid_t *)&hash_value, cfg);
|
||||
#else
|
||||
UNUSED(hash_id);
|
||||
UNUSED(cfg);
|
||||
UNUSED(store_hash);
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define SQL_SELECT_HEALTH_LAST_EXECUTED_EVENT \
|
||||
"SELECT hld.new_status FROM health_log hl, health_log_detail hld " \
|
||||
"WHERE hl.host_id = @host_id AND hl.alarm_id = @alarm_id AND hld.unique_id != @unique_id AND hld.flags & @flags " \
|
||||
|
@ -1634,10 +1606,9 @@ static uint32_t get_next_alarm_event_id(uint64_t health_log_id, uint32_t alarm_i
|
|||
}
|
||||
|
||||
#define SQL_GET_ALARM_ID \
|
||||
"SELECT alarm_id, health_log_id FROM health_log WHERE host_id = @host_id AND chart = @chart " \
|
||||
"AND name = @name AND config_hash_id = @config_hash_id"
|
||||
"SELECT alarm_id, health_log_id FROM health_log WHERE host_id = @host_id AND chart = @chart AND name = @name"
|
||||
|
||||
uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id)
|
||||
uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id)
|
||||
{
|
||||
int rc = 0;
|
||||
sqlite3_stmt *res = NULL;
|
||||
|
@ -1671,13 +1642,6 @@ uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *
|
|||
return alarm_id;
|
||||
}
|
||||
|
||||
rc = sqlite3_bind_blob(res, 4, config_hash_id, sizeof(*config_hash_id), SQLITE_STATIC);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
error_report("Failed to bind config_hash_id parameter for SQL_GET_ALARM_ID.");
|
||||
sqlite3_finalize(res);
|
||||
return alarm_id;
|
||||
}
|
||||
|
||||
while (sqlite3_step_monitored(res) == SQLITE_ROW) {
|
||||
alarm_id = (uint32_t) sqlite3_column_int64(res, 0);
|
||||
health_log_id = (uint64_t) sqlite3_column_int64(res, 1);
|
||||
|
@ -1693,111 +1657,6 @@ uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *
|
|||
return alarm_id;
|
||||
}
|
||||
|
||||
#define SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH \
|
||||
"UPDATE health_log SET config_hash_id = @config_hash_id WHERE host_id = @host_id AND alarm_id = @alarm_id " \
|
||||
"AND health_log_id = @health_log_id"
|
||||
|
||||
void sql_update_alarm_with_config_hash(RRDHOST *host, uint32_t alarm_id, uint64_t health_log_id, uuid_t *config_hash_id)
|
||||
{
|
||||
int rc = 0;
|
||||
sqlite3_stmt *res = NULL;
|
||||
|
||||
rc = sqlite3_prepare_v2(db_meta, SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH, -1, &res, 0);
|
||||
if (rc != SQLITE_OK) {
|
||||
error_report("Failed to prepare statement when trying to update an alarm id with a config hash.");
|
||||
return;
|
||||
}
|
||||
|
||||
rc = sqlite3_bind_blob(res, 1, config_hash_id, sizeof(*config_hash_id), SQLITE_STATIC);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
error_report("Failed to bind config_hash_id parameter for SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH.");
|
||||
goto done;
|
||||
}
|
||||
|
||||
rc = sqlite3_bind_blob(res, 2, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
error_report("Failed to bind host_id parameter for SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH.");
|
||||
goto done;
|
||||
}
|
||||
|
||||
rc = sqlite3_bind_int64(res, 3, (sqlite3_int64) alarm_id);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
error_report("Failed to bind alarm_id parameter for SQL_GET_ALARM_ID.");
|
||||
goto done;
|
||||
}
|
||||
|
||||
rc = sqlite3_bind_int64(res, 4, (sqlite3_int64) health_log_id);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
error_report("Failed to bind alarm_id parameter for SQL_GET_ALARM_ID.");
|
||||
goto done;
|
||||
}
|
||||
|
||||
rc = execute_insert(res);
|
||||
if (unlikely(rc != SQLITE_DONE))
|
||||
error_report("Failed to execute SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH, rc = %d", rc);
|
||||
|
||||
done:
|
||||
rc = sqlite3_finalize(res);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
error_report("Failed to reset statement to update health log detail table with config hash ids, rc = %d", rc);
|
||||
|
||||
}
|
||||
|
||||
#define SQL_GET_ALARM_ID_CHECK_ZERO_HASH \
|
||||
"SELECT alarm_id, health_log_id FROM health_log WHERE host_id = @host_id AND chart = @chart " \
|
||||
"AND name = @name AND (config_hash_id IS NULL OR config_hash_id = ZEROBLOB(16))"
|
||||
|
||||
uint32_t sql_get_alarm_id_check_zero_hash(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id)
|
||||
{
|
||||
int rc = 0;
|
||||
sqlite3_stmt *res = NULL;
|
||||
uint32_t alarm_id = 0;
|
||||
uint64_t health_log_id = 0;
|
||||
|
||||
rc = sqlite3_prepare_v2(db_meta, SQL_GET_ALARM_ID_CHECK_ZERO_HASH, -1, &res, 0);
|
||||
if (rc != SQLITE_OK) {
|
||||
error_report("Failed to prepare statement when trying to get an alarm id with zero hash");
|
||||
return alarm_id;
|
||||
}
|
||||
|
||||
rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
error_report("Failed to bind host_id parameter for SQL_GET_ALARM_ID_CHECK_ZERO_HASH.");
|
||||
sqlite3_finalize(res);
|
||||
return alarm_id;
|
||||
}
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, chart, 2);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
error_report("Failed to bind char parameter for SQL_GET_ALARM_ID_CHECK_ZERO_HASH.");
|
||||
sqlite3_finalize(res);
|
||||
return alarm_id;
|
||||
}
|
||||
|
||||
rc = SQLITE3_BIND_STRING_OR_NULL(res, name, 3);
|
||||
if (unlikely(rc != SQLITE_OK)) {
|
||||
error_report("Failed to bind name parameter for SQL_GET_ALARM_ID_CHECK_ZERO_HASH.");
|
||||
sqlite3_finalize(res);
|
||||
return alarm_id;
|
||||
}
|
||||
|
||||
while (sqlite3_step_monitored(res) == SQLITE_ROW) {
|
||||
alarm_id = (uint32_t) sqlite3_column_int64(res, 0);
|
||||
health_log_id = (uint64_t) sqlite3_column_int64(res, 1);
|
||||
}
|
||||
|
||||
rc = sqlite3_finalize(res);
|
||||
if (unlikely(rc != SQLITE_OK))
|
||||
error_report("Failed to finalize the statement while getting an alarm id.");
|
||||
|
||||
if (alarm_id) {
|
||||
sql_update_alarm_with_config_hash(host, alarm_id, health_log_id, config_hash_id);
|
||||
*next_event_id = get_next_alarm_event_id(health_log_id, alarm_id);
|
||||
}
|
||||
|
||||
return alarm_id;
|
||||
}
|
||||
|
||||
#define SQL_GET_ALARM_ID_FROM_TRANSITION_ID \
|
||||
"SELECT hld.alarm_id, hl.host_id, hl.chart_context FROM health_log_detail hld, health_log hl " \
|
||||
"WHERE hld.transition_id = @transition_id " \
|
||||
|
|
|
@ -7,16 +7,16 @@
|
|||
|
||||
struct sql_alert_transition_data;
|
||||
struct sql_alert_config_data;
|
||||
struct rrd_alert_prototype;
|
||||
void sql_health_alarm_log_load(RRDHOST *host);
|
||||
void sql_health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae);
|
||||
void sql_health_alarm_log_cleanup(RRDHOST *host, bool claimed);
|
||||
int alert_hash_and_store_config(uuid_t hash_id, struct alert_config *cfg, int store_hash);
|
||||
int sql_alert_store_config(struct rrd_alert_prototype *ap);
|
||||
void sql_aclk_alert_clean_dead_entries(RRDHOST *host);
|
||||
int sql_health_get_last_executed_event(RRDHOST *host, ALARM_ENTRY *ae, RRDCALC_STATUS *last_executed_status);
|
||||
void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, time_t after, const char *chart);
|
||||
int health_migrate_old_health_log_table(char *table);
|
||||
uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id);
|
||||
uint32_t sql_get_alarm_id_check_zero_hash(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id);
|
||||
uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id);
|
||||
void sql_alert_transitions(
|
||||
DICTIONARY *nodes,
|
||||
time_t after,
|
||||
|
|
|
@ -374,59 +374,55 @@ static int print_host_variables_callback(const DICTIONARY_ITEM *item __maybe_unu
|
|||
|
||||
struct host_variables_callback_options *opts = data;
|
||||
|
||||
if (rrdvar_flags(rv) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_CUSTOM_CHART_VAR)) {
|
||||
if (!opts->host_header_printed) {
|
||||
opts->host_header_printed = 1;
|
||||
if (!opts->host_header_printed) {
|
||||
opts->host_header_printed = 1;
|
||||
|
||||
if (opts->output_options & PROMETHEUS_OUTPUT_HELP) {
|
||||
buffer_sprintf(opts->wb, "\n# COMMENT global host and chart variables\n");
|
||||
}
|
||||
if (opts->output_options & PROMETHEUS_OUTPUT_HELP) {
|
||||
buffer_sprintf(opts->wb, "\n# COMMENT global host and chart variables\n");
|
||||
}
|
||||
|
||||
NETDATA_DOUBLE value = rrdvar2number(rv);
|
||||
if (isnan(value) || isinf(value)) {
|
||||
if (opts->output_options & PROMETHEUS_OUTPUT_HELP)
|
||||
buffer_sprintf(
|
||||
opts->wb, "# COMMENT variable \"%s\" is %s. Skipped.\n", rrdvar_name(rv), (isnan(value)) ? "NAN" : "INF");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *label_pre = "";
|
||||
char *label_post = "";
|
||||
if (opts->labels && *opts->labels) {
|
||||
label_pre = "{";
|
||||
label_post = "}";
|
||||
}
|
||||
|
||||
prometheus_name_copy(opts->name, rrdvar_name(rv), sizeof(opts->name));
|
||||
|
||||
if (opts->output_options & PROMETHEUS_OUTPUT_TIMESTAMPS)
|
||||
buffer_sprintf(
|
||||
opts->wb,
|
||||
"%s_%s%s%s%s " NETDATA_DOUBLE_FORMAT " %llu\n",
|
||||
opts->prefix,
|
||||
opts->name,
|
||||
label_pre,
|
||||
opts->labels,
|
||||
label_post,
|
||||
value,
|
||||
opts->now * 1000ULL);
|
||||
else
|
||||
buffer_sprintf(
|
||||
opts->wb,
|
||||
"%s_%s%s%s%s " NETDATA_DOUBLE_FORMAT "\n",
|
||||
opts->prefix,
|
||||
opts->name,
|
||||
label_pre,
|
||||
opts->labels,
|
||||
label_post,
|
||||
value);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
NETDATA_DOUBLE value = rrdvar2number(rv);
|
||||
if (isnan(value) || isinf(value)) {
|
||||
if (opts->output_options & PROMETHEUS_OUTPUT_HELP)
|
||||
buffer_sprintf(
|
||||
opts->wb, "# COMMENT variable \"%s\" is %s. Skipped.\n", rrdvar_name(rv), (isnan(value)) ? "NAN" : "INF");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *label_pre = "";
|
||||
char *label_post = "";
|
||||
if (opts->labels && *opts->labels) {
|
||||
label_pre = "{";
|
||||
label_post = "}";
|
||||
}
|
||||
|
||||
prometheus_name_copy(opts->name, rrdvar_name(rv), sizeof(opts->name));
|
||||
|
||||
if (opts->output_options & PROMETHEUS_OUTPUT_TIMESTAMPS)
|
||||
buffer_sprintf(
|
||||
opts->wb,
|
||||
"%s_%s%s%s%s " NETDATA_DOUBLE_FORMAT " %llu\n",
|
||||
opts->prefix,
|
||||
opts->name,
|
||||
label_pre,
|
||||
opts->labels,
|
||||
label_post,
|
||||
value,
|
||||
opts->now * 1000ULL);
|
||||
else
|
||||
buffer_sprintf(
|
||||
opts->wb,
|
||||
"%s_%s%s%s%s " NETDATA_DOUBLE_FORMAT "\n",
|
||||
opts->prefix,
|
||||
opts->name,
|
||||
label_pre,
|
||||
opts->labels,
|
||||
label_post,
|
||||
value);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct gen_parameters {
|
||||
|
@ -534,7 +530,6 @@ static void prometheus_print_os_info(
|
|||
FILE *fp;
|
||||
char filename[FILENAME_MAX + 1];
|
||||
char buf[BUFSIZ + 1];
|
||||
int first_line = 1;
|
||||
|
||||
snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/etc/os-release");
|
||||
fp = fopen(filename, "r");
|
||||
|
@ -596,7 +591,6 @@ static void prometheus_print_os_info(
|
|||
*val = '\0';
|
||||
val++;
|
||||
buffer_sprintf(wb, ",%s=\"%s\"", key, val);
|
||||
first_line = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -330,24 +330,22 @@ static int format_variable_prometheus_remote_write_callback(const DICTIONARY_ITE
|
|||
|
||||
struct prometheus_remote_write_variables_callback_options *opts = data;
|
||||
|
||||
if (rrdvar_flags(rv) & (RRDVAR_FLAG_CUSTOM_HOST_VAR | RRDVAR_FLAG_CUSTOM_CHART_VAR)) {
|
||||
RRDHOST *host = opts->host;
|
||||
struct instance *instance = opts->instance;
|
||||
struct simple_connector_data *simple_connector_data =
|
||||
(struct simple_connector_data *)instance->connector_specific_data;
|
||||
struct prometheus_remote_write_specific_data *connector_specific_data =
|
||||
(struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data;
|
||||
RRDHOST *host = opts->host;
|
||||
struct instance *instance = opts->instance;
|
||||
struct simple_connector_data *simple_connector_data =
|
||||
(struct simple_connector_data *)instance->connector_specific_data;
|
||||
struct prometheus_remote_write_specific_data *connector_specific_data =
|
||||
(struct prometheus_remote_write_specific_data *)simple_connector_data->connector_specific_data;
|
||||
|
||||
char name[PROMETHEUS_LABELS_MAX + 1];
|
||||
char *suffix = "";
|
||||
char name[PROMETHEUS_LABELS_MAX + 1];
|
||||
char *suffix = "";
|
||||
|
||||
prometheus_name_copy(context, rrdvar_name(rv), PROMETHEUS_ELEMENT_MAX);
|
||||
snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s", instance->config.prefix, context, suffix);
|
||||
prometheus_name_copy(context, rrdvar_name(rv), PROMETHEUS_ELEMENT_MAX);
|
||||
snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s", instance->config.prefix, context, suffix);
|
||||
|
||||
NETDATA_DOUBLE value = rrdvar2number(rv);
|
||||
add_variable(connector_specific_data->write_request, name,
|
||||
(host == localhost) ? instance->config.hostname : rrdhost_hostname(host), value, opts->now / USEC_PER_MS);
|
||||
}
|
||||
NETDATA_DOUBLE value = rrdvar2number(rv);
|
||||
add_variable(connector_specific_data->write_request, name,
|
||||
(host == localhost) ? instance->config.hostname : rrdhost_hostname(host), value, opts->now / USEC_PER_MS);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
1734
health/health.c
1734
health/health.c
File diff suppressed because it is too large
Load diff
|
@ -6,7 +6,7 @@
|
|||
class: Errors
|
||||
type: System
|
||||
component: RAID
|
||||
lookup: max -10s foreach *
|
||||
lookup: max -10s
|
||||
units: bool
|
||||
every: 10s
|
||||
crit: $this > 0
|
||||
|
@ -22,7 +22,7 @@ component: RAID
|
|||
class: Errors
|
||||
type: System
|
||||
component: RAID
|
||||
lookup: max -10s foreach *
|
||||
lookup: max -10s
|
||||
units: bool
|
||||
every: 10s
|
||||
crit: $this > 0
|
||||
|
|
|
@ -1,23 +1,25 @@
|
|||
# raise a warning alarm if an anomaly probability is consistently above 50%
|
||||
## raise a warning alarm if an anomaly probability is consistently above 50%
|
||||
|
||||
template: anomalies_anomaly_probabilities
|
||||
on: anomalies.probability
|
||||
class: Errors
|
||||
type: Netdata
|
||||
component: ML
|
||||
lookup: average -2m foreach *
|
||||
every: 1m
|
||||
warn: $this > 50
|
||||
info: average anomaly probability over the last 2 minutes
|
||||
## "foreach" was removed, these alarms don't work anymore
|
||||
|
||||
# template: anomalies_anomaly_probabilities
|
||||
# on: anomalies.probability
|
||||
# class: Errors
|
||||
# type: Netdata
|
||||
#component: ML
|
||||
# lookup: average -2m foreach *
|
||||
# every: 1m
|
||||
# warn: $this > 50
|
||||
# info: average anomaly probability over the last 2 minutes
|
||||
|
||||
# raise a warning alarm if an anomaly flag is consistently firing
|
||||
|
||||
template: anomalies_anomaly_flags
|
||||
on: anomalies.anomaly
|
||||
class: Errors
|
||||
type: Netdata
|
||||
component: ML
|
||||
lookup: sum -2m foreach *
|
||||
every: 1m
|
||||
warn: $this > 10
|
||||
info: number of anomalies in the last 2 minutes
|
||||
# template: anomalies_anomaly_flags
|
||||
# on: anomalies.anomaly
|
||||
# class: Errors
|
||||
# type: Netdata
|
||||
#component: ML
|
||||
# lookup: sum -2m foreach *
|
||||
# every: 1m
|
||||
# warn: $this > 10
|
||||
# info: number of anomalies in the last 2 minutes
|
||||
|
|
|
@ -23,7 +23,7 @@ component: Process
|
|||
os: linux
|
||||
module: *
|
||||
hosts: *
|
||||
lookup: max -10s unaligned foreach *
|
||||
lookup: max -10s unaligned
|
||||
units: %
|
||||
every: 10s
|
||||
warn: $this > (($status >= $WARNING) ? (85) : (95))
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
class: Errors
|
||||
type: System
|
||||
component: RAID
|
||||
lookup: max -10s foreach *
|
||||
lookup: max -10s
|
||||
units: boolean
|
||||
every: 10s
|
||||
crit: $this > 0
|
||||
|
@ -22,7 +22,7 @@ component: RAID
|
|||
class: Errors
|
||||
type: System
|
||||
component: RAID
|
||||
lookup: sum -10s foreach *
|
||||
lookup: sum -10s
|
||||
units: predictive failures
|
||||
every: 10s
|
||||
warn: $this > 0
|
||||
|
@ -36,7 +36,7 @@ component: RAID
|
|||
class: Errors
|
||||
type: System
|
||||
component: RAID
|
||||
lookup: sum -10s foreach *
|
||||
lookup: sum -10s
|
||||
units: media errors
|
||||
every: 10s
|
||||
warn: $this > 0
|
||||
|
|
|
@ -20,7 +20,8 @@ component: Redis
|
|||
type: KV Storage
|
||||
component: Redis
|
||||
every: 10s
|
||||
crit: $last_bgsave != nan AND $last_bgsave != 0
|
||||
calc: $last_bgsave != nan AND $last_bgsave != 0
|
||||
crit: $this
|
||||
units: ok/failed
|
||||
summary: Redis background save
|
||||
info: Status of the last RDB save operation (0: ok, 1: error)
|
||||
|
|
|
@ -4,8 +4,7 @@
|
|||
#define NETDATA_HEALTH_H 1
|
||||
|
||||
#include "daemon/common.h"
|
||||
|
||||
extern unsigned int default_health_enabled;
|
||||
#include "rrdcalc.h"
|
||||
|
||||
typedef enum __attribute__((packed)) {
|
||||
HEALTH_ENTRY_FLAG_PROCESSED = 0x00000001, // notifications engine has processed this
|
||||
|
@ -41,20 +40,17 @@ void health_entry_flags_to_json_array(BUFFER *wb, const char *key, HEALTH_ENTRY_
|
|||
|
||||
#define HEALTH_SILENCERS_MAX_FILE_LEN 10000
|
||||
|
||||
extern char *silencers_filename;
|
||||
extern SIMPLE_PATTERN *conf_enabled_alarms;
|
||||
extern DICTIONARY *health_rrdvars;
|
||||
void health_plugin_init(void);
|
||||
void health_plugin_destroy(void);
|
||||
|
||||
void health_init(void);
|
||||
|
||||
void health_reload(void);
|
||||
void health_plugin_reload(void);
|
||||
|
||||
void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* context, RRDCALC_STATUS status);
|
||||
void health_alarms2json(RRDHOST *host, BUFFER *wb, int all);
|
||||
void health_alert2json_conf(RRDHOST *host, BUFFER *wb, CONTEXTS_V2_OPTIONS all);
|
||||
void health_alarms_values2json(RRDHOST *host, BUFFER *wb, int all);
|
||||
|
||||
void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf);
|
||||
void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *wb);
|
||||
void health_api_v1_chart_custom_variables2json(RRDSET *st, BUFFER *buf);
|
||||
|
||||
int health_alarm_log_open(RRDHOST *host);
|
||||
|
@ -63,34 +59,18 @@ void health_alarm_log_load(RRDHOST *host);
|
|||
|
||||
ALARM_ENTRY* health_create_alarm_entry(
|
||||
RRDHOST *host,
|
||||
uint32_t alarm_id,
|
||||
uint32_t alarm_event_id,
|
||||
const uuid_t config_hash_id,
|
||||
RRDCALC *rc,
|
||||
time_t when,
|
||||
STRING *name,
|
||||
STRING *chart,
|
||||
STRING *chart_context,
|
||||
STRING *chart_id,
|
||||
STRING *classification,
|
||||
STRING *component,
|
||||
STRING *type,
|
||||
STRING *exec,
|
||||
STRING *recipient,
|
||||
time_t duration,
|
||||
NETDATA_DOUBLE old_value,
|
||||
NETDATA_DOUBLE new_value,
|
||||
RRDCALC_STATUS old_status,
|
||||
RRDCALC_STATUS new_status,
|
||||
STRING *source,
|
||||
STRING *units,
|
||||
STRING *summary,
|
||||
STRING *info,
|
||||
int delay,
|
||||
HEALTH_ENTRY_FLAGS flags);
|
||||
|
||||
void health_alarm_log_add_entry(RRDHOST *host, ALARM_ENTRY *ae);
|
||||
|
||||
void health_readdir(RRDHOST *host, const char *user_path, const char *stock_path, const char *subpath);
|
||||
char *health_user_config_dir(void);
|
||||
char *health_stock_config_dir(void);
|
||||
void health_alarm_log_free(RRDHOST *host);
|
||||
|
@ -100,7 +80,6 @@ void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae);
|
|||
void *health_cmdapi_thread(void *ptr);
|
||||
|
||||
char *health_edit_command_from_source(const char *source);
|
||||
void sql_refresh_hashes(void);
|
||||
|
||||
void health_string2json(BUFFER *wb, const char *prefix, const char *label, const char *value, const char *suffix);
|
||||
|
||||
|
@ -108,4 +87,12 @@ void health_log_alert_transition_with_trace(RRDHOST *host, ALARM_ENTRY *ae, int
|
|||
#define health_log_alert(host, ae) health_log_alert_transition_with_trace(host, ae, __LINE__, __FILE__, __FUNCTION__)
|
||||
bool health_alarm_log_get_global_id_and_transition_id_for_rrdcalc(RRDCALC *rc, usec_t *global_id, uuid_t *transitions_id);
|
||||
|
||||
int alert_variable_lookup_trace(RRDHOST *host, RRDSET *st, const char *variable, BUFFER *wb);
|
||||
|
||||
#include "health_prototypes.h"
|
||||
#include "health_silencers.h"
|
||||
|
||||
typedef void (*prototype_metadata_cb_t)(void *data, STRING *type, STRING *component, STRING *classification, STRING *recipient);
|
||||
void health_prototype_metadata_foreach(void *data, prototype_metadata_cb_t cb);
|
||||
|
||||
#endif //NETDATA_HEALTH_H
|
||||
|
|
File diff suppressed because it is too large
Load diff
603
health/health_dyncfg.c
Normal file
603
health/health_dyncfg.c
Normal file
|
@ -0,0 +1,603 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "health_internals.h"
|
||||
|
||||
#define DYNCFG_HEALTH_ALERT_PROTOTYPE_PREFIX "health:alert:prototype"
|
||||
|
||||
static void health_dyncfg_register_prototype(RRD_ALERT_PROTOTYPE *ap);
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
// parse the json object of an alert definition
|
||||
|
||||
static bool parse_match(json_object *jobj, const char *path, struct rrd_alert_match *match, BUFFER *error) {
|
||||
STRING *on = NULL;
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "on", on, error, true);
|
||||
if(match->is_template)
|
||||
match->on.context = on;
|
||||
else
|
||||
match->on.chart = on;
|
||||
|
||||
JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, "os", match->os, error);
|
||||
JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, "host", match->host, error);
|
||||
|
||||
if(match->is_template)
|
||||
JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, "instances", match->charts, error);
|
||||
|
||||
JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, "plugin", match->plugin, error);
|
||||
JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, "module", match->module, error);
|
||||
JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, "host_labels", match->host_labels, error);
|
||||
JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, "instance_labels", match->chart_labels, error);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_config_value_database_lookup(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error) {
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "after", config->after, error);
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "before", config->before, error);
|
||||
JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, "grouping", time_grouping_txt2id, config->group, error);
|
||||
JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, "options", rrdr_options_parse_one, config->options, error);
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "dimensions", config->dimensions, error, true);
|
||||
return true;
|
||||
}
|
||||
static bool parse_config_value(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error) {
|
||||
JSONC_PARSE_SUBOBJECT(jobj, path, "database_lookup", config, parse_config_value_database_lookup, error);
|
||||
JSONC_PARSE_TXT2EXPRESSION_OR_ERROR_AND_RETURN(jobj, path, "calculation", config->calculation, error);
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "units", config->units, error, true);
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "update_every", config->update_every, error);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_config_conditions(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error) {
|
||||
JSONC_PARSE_DOUBLE_OR_ERROR_AND_RETURN(jobj, path, "green", config->green, error);
|
||||
JSONC_PARSE_DOUBLE_OR_ERROR_AND_RETURN(jobj, path, "red", config->red, error);
|
||||
JSONC_PARSE_TXT2EXPRESSION_OR_ERROR_AND_RETURN(jobj, path, "warning_condition", config->warning, error);
|
||||
JSONC_PARSE_TXT2EXPRESSION_OR_ERROR_AND_RETURN(jobj, path, "critical_condition", config->critical, error);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_config_action_delay(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error) {
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "up", config->delay_up_duration, error);
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "down", config->delay_down_duration, error);
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "max", config->delay_max_duration, error);
|
||||
JSONC_PARSE_DOUBLE_OR_ERROR_AND_RETURN(jobj, path, "multiplier", config->delay_multiplier, error);
|
||||
return true;
|
||||
}
|
||||
static bool parse_config_action_repeat(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error) {
|
||||
JSONC_PARSE_BOOL_OR_ERROR_AND_RETURN(jobj, path, "enabled", config->has_custom_repeat_config, error);
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "warning", config->warn_repeat_every, error);
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "critical", config->crit_repeat_every, error);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_config_action(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error) {
|
||||
JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, "options", alert_action_options_parse_one, config->alert_action_options, error);
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "execute", config->exec, error, true);
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "recipient", config->recipient, error, true);
|
||||
JSONC_PARSE_SUBOBJECT(jobj, path, "delay", config, parse_config_action_delay, error);
|
||||
JSONC_PARSE_SUBOBJECT(jobj, path, "repeat", config, parse_config_action_repeat, error);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_config(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error) {
|
||||
// we shouldn't parse these from the payload - they are given to us via the function call
|
||||
// JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, "source_type", dyncfg_source_type2id, config->source_type);
|
||||
// JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, "source", config->source);
|
||||
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "summary", config->summary, error, true);
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "info", config->info, error, true);
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "type", config->type, error, true);
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "component", config->component, error, true);
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "classification", config->classification, error, true);
|
||||
|
||||
JSONC_PARSE_SUBOBJECT(jobj, path, "value", config, parse_config_value, error);
|
||||
JSONC_PARSE_SUBOBJECT(jobj, path, "conditions", config, parse_config_conditions, error);
|
||||
JSONC_PARSE_SUBOBJECT(jobj, path, "action", config, parse_config_action, error);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parse_prototype(json_object *jobj, const char *path, RRD_ALERT_PROTOTYPE *base, BUFFER *error) {
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "name", base->config.name, error, false);
|
||||
|
||||
int64_t version;
|
||||
JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, "format_version", version, error);
|
||||
|
||||
json_object *rules;
|
||||
if (json_object_object_get_ex(jobj, "rules", &rules)) {
|
||||
size_t rules_len = json_object_array_length(rules);
|
||||
|
||||
RRD_ALERT_PROTOTYPE *ap = base; // fill the first entry
|
||||
for (size_t i = 0; i < rules_len; i++) {
|
||||
if(!ap) {
|
||||
ap = callocz(1, sizeof(*base));
|
||||
ap->config.name = string_dup(base->config.name);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(base->_internal.next, ap, _internal.prev, _internal.next);
|
||||
}
|
||||
|
||||
json_object *rule = json_object_array_get_idx(rules, i);
|
||||
|
||||
JSONC_PARSE_BOOL_OR_ERROR_AND_RETURN(rule, path, "enabled", ap->match.enabled, error);
|
||||
|
||||
STRING *type = NULL;
|
||||
JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(rule, path, "type", type, error, true);
|
||||
if(string_strcmp(type, "template") == 0)
|
||||
ap->match.is_template = true;
|
||||
else if(string_strcmp(type, "instance") == 0)
|
||||
ap->match.is_template = false;
|
||||
else {
|
||||
buffer_sprintf(error, "type is '%s', but it can only be 'instance' or 'template'", string2str(type));
|
||||
return false;
|
||||
}
|
||||
|
||||
JSONC_PARSE_SUBOBJECT(rule, path, "match", &ap->match, parse_match, error);
|
||||
JSONC_PARSE_SUBOBJECT(rule, path, "config", &ap->config, parse_config, error);
|
||||
|
||||
ap = NULL; // so that we will create another one, if available
|
||||
}
|
||||
}
|
||||
else {
|
||||
buffer_sprintf(error, "the rules array is missing");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static RRD_ALERT_PROTOTYPE *health_prototype_payload_parse(const char *payload, size_t payload_len, BUFFER *error, const char *name) {
|
||||
RRD_ALERT_PROTOTYPE *base = callocz(1, sizeof(*base));
|
||||
CLEAN_JSON_OBJECT *jobj = NULL;
|
||||
|
||||
struct json_tokener *tokener = json_tokener_new();
|
||||
if (!tokener) {
|
||||
buffer_sprintf(error, "failed to allocate memory for json tokener");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
jobj = json_tokener_parse_ex(tokener, payload, (int)payload_len);
|
||||
if (json_tokener_get_error(tokener) != json_tokener_success) {
|
||||
const char *error_msg = json_tokener_error_desc(json_tokener_get_error(tokener));
|
||||
buffer_sprintf(error, "failed to parse json payload: %s", error_msg);
|
||||
json_tokener_free(tokener);
|
||||
goto cleanup;
|
||||
}
|
||||
json_tokener_free(tokener);
|
||||
|
||||
if(!parse_prototype(jobj, "", base, error))
|
||||
goto cleanup;
|
||||
|
||||
if(!base->config.name && name)
|
||||
base->config.name = string_strdupz(name);
|
||||
|
||||
int i = 1;
|
||||
for(RRD_ALERT_PROTOTYPE *ap = base; ap; ap = ap->_internal.next, i++) {
|
||||
if(ap->config.name != base->config.name) {
|
||||
string_freez(ap->config.name);
|
||||
ap->config.name = string_dup(base->config.name);
|
||||
}
|
||||
|
||||
if(!RRDCALC_HAS_DB_LOOKUP(ap) && !ap->config.calculation) {
|
||||
buffer_sprintf(error, "the rule No %d has neither database lookup nor calculation", i);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if(ap->match.enabled)
|
||||
base->_internal.enabled = true;
|
||||
}
|
||||
|
||||
if(string_strcmp(base->config.name, name) != 0) {
|
||||
buffer_sprintf(error,
|
||||
"name parsed ('%s') does not match the name of the alert prototype ('%s')",
|
||||
string2str(base->config.name), name);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
return base;
|
||||
|
||||
cleanup:
|
||||
health_prototype_free(base);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
// generate the json object of an alert definition
|
||||
|
||||
static inline void health_prototype_rule_to_json_array_member(BUFFER *wb, RRD_ALERT_PROTOTYPE *ap, bool for_hashing) {
|
||||
buffer_json_add_array_item_object(wb);
|
||||
{
|
||||
buffer_json_member_add_boolean(wb, "enabled", ap->match.enabled);
|
||||
buffer_json_member_add_string(wb, "type", ap->match.is_template ? "template" : "instance");
|
||||
|
||||
buffer_json_member_add_object(wb, "match");
|
||||
{
|
||||
if(ap->match.is_template)
|
||||
buffer_json_member_add_string(wb, "on", string2str(ap->match.on.context));
|
||||
else
|
||||
buffer_json_member_add_string(wb, "on", string2str(ap->match.on.chart));
|
||||
|
||||
buffer_json_member_add_string_or_empty(wb, "os", ap->match.os ? string2str(ap->match.os) : "*");
|
||||
buffer_json_member_add_string_or_empty(wb, "host", ap->match.host ? string2str(ap->match.host) : "*");
|
||||
buffer_json_member_add_string_or_empty(wb, "instances", ap->match.charts ? string2str(ap->match.charts) : "*");
|
||||
buffer_json_member_add_string_or_empty(wb, "plugin", ap->match.charts ? string2str(ap->match.plugin) : "*");
|
||||
buffer_json_member_add_string_or_empty(wb, "module", ap->match.module ? string2str(ap->match.module) : "*");
|
||||
buffer_json_member_add_string_or_empty(wb, "host_labels", ap->match.host_labels ? string2str(ap->match.host_labels) : "*");
|
||||
buffer_json_member_add_string_or_empty(wb, "instance_labels", ap->match.chart_labels ? string2str(ap->match.chart_labels) : "*");
|
||||
}
|
||||
buffer_json_object_close(wb); // match
|
||||
|
||||
buffer_json_member_add_object(wb, "config");
|
||||
{
|
||||
if(!for_hashing) {
|
||||
buffer_json_member_add_uuid(wb, "hash", &ap->config.hash_id);
|
||||
buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(ap->config.source_type));
|
||||
buffer_json_member_add_string(wb, "source", string2str(ap->config.source));
|
||||
}
|
||||
|
||||
buffer_json_member_add_string(wb, "summary", string2str(ap->config.summary));
|
||||
buffer_json_member_add_string(wb, "info", string2str(ap->config.info));
|
||||
|
||||
buffer_json_member_add_string(wb, "type", string2str(ap->config.type));
|
||||
buffer_json_member_add_string(wb, "component", string2str(ap->config.component));
|
||||
buffer_json_member_add_string(wb, "classification", string2str(ap->config.classification));
|
||||
|
||||
buffer_json_member_add_object(wb, "value");
|
||||
{
|
||||
buffer_json_member_add_object(wb, "database_lookup");
|
||||
{
|
||||
buffer_json_member_add_int64(wb, "after", ap->config.after);
|
||||
buffer_json_member_add_int64(wb, "before", ap->config.before);
|
||||
buffer_json_member_add_string(wb, "grouping", time_grouping_id2txt(ap->config.group));
|
||||
rrdr_options_to_buffer_json_array(wb, "options", ap->config.options);
|
||||
buffer_json_member_add_string(wb, "dimensions", string2str(ap->config.dimensions));
|
||||
}
|
||||
buffer_json_object_close(wb); // database lookup
|
||||
|
||||
buffer_json_member_add_string(wb, "calculation", expression_source(ap->config.calculation));
|
||||
buffer_json_member_add_string(wb, "units", string2str(ap->config.units));
|
||||
buffer_json_member_add_uint64(wb, "update_every", ap->config.update_every);
|
||||
}
|
||||
buffer_json_object_close(wb); // value
|
||||
|
||||
buffer_json_member_add_object(wb, "conditions");
|
||||
{
|
||||
buffer_json_member_add_double(wb, "green", ap->config.green);
|
||||
buffer_json_member_add_double(wb, "red", ap->config.red);
|
||||
buffer_json_member_add_string(wb, "warning_condition", expression_source(ap->config.warning));
|
||||
buffer_json_member_add_string(wb, "critical_condition", expression_source(ap->config.critical));
|
||||
}
|
||||
buffer_json_object_close(wb); // conditions
|
||||
|
||||
buffer_json_member_add_object(wb, "action");
|
||||
{
|
||||
alert_action_options_to_buffer_json_array(wb, "options", ap->config.alert_action_options);
|
||||
buffer_json_member_add_string(wb, "execute", string2str(ap->config.exec));
|
||||
buffer_json_member_add_string(wb, "recipient", string2str(ap->config.recipient));
|
||||
|
||||
buffer_json_member_add_object(wb, "delay");
|
||||
{
|
||||
buffer_json_member_add_int64(wb, "up", ap->config.delay_up_duration);
|
||||
buffer_json_member_add_int64(wb, "down", ap->config.delay_down_duration);
|
||||
buffer_json_member_add_int64(wb, "max", ap->config.delay_max_duration);
|
||||
buffer_json_member_add_double(wb, "multiplier", ap->config.delay_multiplier);
|
||||
}
|
||||
buffer_json_object_close(wb); // delay
|
||||
|
||||
buffer_json_member_add_object(wb, "repeat");
|
||||
{
|
||||
buffer_json_member_add_boolean(wb, "enabled", ap->config.has_custom_repeat_config);
|
||||
buffer_json_member_add_uint64(wb, "warning", ap->config.has_custom_repeat_config ? ap->config.warn_repeat_every : 0);
|
||||
buffer_json_member_add_uint64(wb, "critical", ap->config.has_custom_repeat_config ? ap->config.crit_repeat_every : 0);
|
||||
}
|
||||
buffer_json_object_close(wb); // repeat
|
||||
}
|
||||
buffer_json_object_close(wb); // action
|
||||
}
|
||||
buffer_json_object_close(wb); // match
|
||||
}
|
||||
buffer_json_object_close(wb); // array item
|
||||
}
|
||||
|
||||
void health_prototype_to_json(BUFFER *wb, RRD_ALERT_PROTOTYPE *ap, bool for_hashing) {
|
||||
buffer_flush(wb);
|
||||
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY);
|
||||
|
||||
buffer_json_member_add_uint64(wb, "format_version", 1);
|
||||
buffer_json_member_add_string(wb, "name", string2str(ap->config.name));
|
||||
buffer_json_member_add_array(wb, "rules");
|
||||
{
|
||||
for(RRD_ALERT_PROTOTYPE *t = ap; t ; t = t->_internal.next)
|
||||
health_prototype_rule_to_json_array_member(wb, t, for_hashing);
|
||||
}
|
||||
buffer_json_array_close(wb); // rules
|
||||
buffer_json_finalize(wb);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
static size_t dyncfg_health_remove_all_rrdcalc_of_prototype(STRING *alert_name) {
|
||||
size_t removed = 0;
|
||||
|
||||
RRDHOST *host;
|
||||
dfe_start_reentrant(rrdhost_root_index, host) {
|
||||
RRDCALC *rc;
|
||||
foreach_rrdcalc_in_rrdhost_read(host, rc) {
|
||||
if(rc->config.name != alert_name)
|
||||
continue;
|
||||
|
||||
rrdcalc_unlink_and_delete(host, rc, false);
|
||||
removed++;
|
||||
}
|
||||
foreach_rrdcalc_in_rrdhost_done(rc);
|
||||
}
|
||||
dfe_done(host);
|
||||
|
||||
return removed;
|
||||
}
|
||||
|
||||
static void dyncfg_health_prototype_reapply(RRD_ALERT_PROTOTYPE *ap) {
|
||||
dyncfg_health_remove_all_rrdcalc_of_prototype(ap->config.name);
|
||||
health_prototype_apply_to_all_hosts(ap);
|
||||
}
|
||||
|
||||
static int dyncfg_health_prototype_template_action(BUFFER *result, DYNCFG_CMDS cmd, const char *add_name, BUFFER *payload, const char *source __maybe_unused) {
|
||||
int code = HTTP_RESP_INTERNAL_SERVER_ERROR;
|
||||
switch(cmd) {
|
||||
case DYNCFG_CMD_ADD: {
|
||||
CLEAN_BUFFER *error = buffer_create(0, NULL);
|
||||
RRD_ALERT_PROTOTYPE *nap = health_prototype_payload_parse(buffer_tostring(payload), buffer_strlen(payload), error, add_name);
|
||||
if(!nap)
|
||||
code = dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, buffer_tostring(error));
|
||||
else {
|
||||
nap->config.source_type = DYNCFG_SOURCE_TYPE_DYNCFG;
|
||||
bool added = health_prototype_add(nap); // this swaps ap <-> nap
|
||||
|
||||
if(!added) {
|
||||
health_prototype_free(nap);
|
||||
return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "required attributes are missing");
|
||||
}
|
||||
else
|
||||
freez(nap);
|
||||
|
||||
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(health_globals.prototypes.dict, add_name);
|
||||
if(!item)
|
||||
return dyncfg_default_response(result, HTTP_RESP_INTERNAL_SERVER_ERROR, "added prototype is not found");
|
||||
|
||||
RRD_ALERT_PROTOTYPE *ap = dictionary_acquired_item_value(item);
|
||||
|
||||
dyncfg_health_prototype_reapply(ap);
|
||||
health_dyncfg_register_prototype(ap);
|
||||
code = ap->_internal.enabled ? DYNCFG_RESP_ACCEPTED : DYNCFG_RESP_ACCEPTED_DISABLED;
|
||||
dictionary_acquired_item_release(health_globals.prototypes.dict, item);
|
||||
|
||||
code = dyncfg_default_response(result, code, "accepted");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_SCHEMA:
|
||||
code = dyncfg_default_response(result, HTTP_RESP_NOT_IMPLEMENTED, "schema not implemented yet for prototype templates");
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_REMOVE:
|
||||
case DYNCFG_CMD_RESTART:
|
||||
case DYNCFG_CMD_DISABLE:
|
||||
case DYNCFG_CMD_ENABLE:
|
||||
case DYNCFG_CMD_UPDATE:
|
||||
case DYNCFG_CMD_TEST:
|
||||
case DYNCFG_CMD_GET:
|
||||
code = dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "action given is not supported for prototype templates");
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_NONE:
|
||||
code = dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "invalid action received for prototype templates");
|
||||
break;
|
||||
}
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
static int dyncfg_health_prototype_job_action(BUFFER *result, DYNCFG_CMDS cmd, BUFFER *payload, const char *source __maybe_unused, const char *alert_name) {
|
||||
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(health_globals.prototypes.dict, alert_name);
|
||||
if(!item)
|
||||
return dyncfg_default_response(result, HTTP_RESP_NOT_FOUND, "no alert prototype is available by the name given");
|
||||
|
||||
RRD_ALERT_PROTOTYPE *ap = dictionary_acquired_item_value(item);
|
||||
|
||||
char alert_name_dyncfg[strlen(DYNCFG_HEALTH_ALERT_PROTOTYPE_PREFIX) + strlen(alert_name) + 10];
|
||||
snprintfz(alert_name_dyncfg, sizeof(alert_name_dyncfg), DYNCFG_HEALTH_ALERT_PROTOTYPE_PREFIX ":%s", alert_name);
|
||||
|
||||
int code = HTTP_RESP_INTERNAL_SERVER_ERROR;
|
||||
|
||||
switch(cmd) {
|
||||
case DYNCFG_CMD_SCHEMA:
|
||||
code = dyncfg_default_response(result, HTTP_RESP_NOT_IMPLEMENTED, "schema not implemented yet");
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_GET:
|
||||
health_prototype_to_json(result, ap, false);
|
||||
code = HTTP_RESP_OK;
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_DISABLE:
|
||||
if(ap->_internal.enabled) {
|
||||
ap->_internal.enabled = false;
|
||||
dyncfg_health_prototype_reapply(ap);
|
||||
dyncfg_status(localhost, alert_name_dyncfg, DYNCFG_STATUS_DISABLED);
|
||||
code = dyncfg_default_response(result, HTTP_RESP_OK, "disabled");
|
||||
}
|
||||
else
|
||||
code = dyncfg_default_response(result, HTTP_RESP_OK, "already disabled");
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_ENABLE:
|
||||
if(ap->_internal.enabled)
|
||||
code = dyncfg_default_response(result, HTTP_RESP_OK, "already enabled");
|
||||
else {
|
||||
size_t matches_enabled = 0;
|
||||
spinlock_lock(&ap->_internal.spinlock);
|
||||
for(RRD_ALERT_PROTOTYPE *t = ap; t ;t = t->_internal.next)
|
||||
if(t->match.enabled)
|
||||
matches_enabled++;
|
||||
spinlock_unlock(&ap->_internal.spinlock);
|
||||
|
||||
if(!matches_enabled) {
|
||||
code = dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "all rules in this alert are disabled, so enabling the alert has no effect");
|
||||
}
|
||||
else {
|
||||
ap->_internal.enabled = true;
|
||||
dyncfg_health_prototype_reapply(ap);
|
||||
dyncfg_status(localhost, alert_name_dyncfg, DYNCFG_STATUS_ACCEPTED);
|
||||
code = dyncfg_default_response(result, DYNCFG_RESP_ACCEPTED, "enabled");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_UPDATE: {
|
||||
CLEAN_BUFFER *error = buffer_create(0, NULL);
|
||||
RRD_ALERT_PROTOTYPE *nap = health_prototype_payload_parse(buffer_tostring(payload), buffer_strlen(payload), error, alert_name);
|
||||
if(!nap)
|
||||
code = dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, buffer_tostring(error));
|
||||
else {
|
||||
nap->config.source_type = DYNCFG_SOURCE_TYPE_DYNCFG;
|
||||
bool added = health_prototype_add(nap); // this swaps ap <-> nap
|
||||
|
||||
if(!added) {
|
||||
health_prototype_free(nap);
|
||||
return dyncfg_default_response( result, HTTP_RESP_BAD_REQUEST, "required attributes are missing");
|
||||
}
|
||||
else
|
||||
freez(nap);
|
||||
|
||||
dyncfg_health_prototype_reapply(ap);
|
||||
code = ap->_internal.enabled ? DYNCFG_RESP_ACCEPTED : DYNCFG_RESP_ACCEPTED_DISABLED;
|
||||
code = dyncfg_default_response(result, code, "updated");
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_REMOVE:
|
||||
dyncfg_health_remove_all_rrdcalc_of_prototype(ap->config.name);
|
||||
dictionary_del(health_globals.prototypes.dict, dictionary_acquired_item_name(item));
|
||||
code = dyncfg_default_response(result, HTTP_RESP_OK, "deleted");
|
||||
dyncfg_del(localhost, alert_name_dyncfg);
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_TEST:
|
||||
case DYNCFG_CMD_ADD:
|
||||
case DYNCFG_CMD_RESTART:
|
||||
code = dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "action given is not supported for the prototype job");
|
||||
break;
|
||||
|
||||
case DYNCFG_CMD_NONE:
|
||||
code = dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "invalid action received");
|
||||
break;
|
||||
}
|
||||
|
||||
dictionary_acquired_item_release(health_globals.prototypes.dict, item);
|
||||
return code;
|
||||
}
|
||||
|
||||
int dyncfg_health_cb(const char *transaction __maybe_unused, const char *id, DYNCFG_CMDS cmd, const char *add_name,
|
||||
BUFFER *payload, usec_t *stop_monotonic_ut __maybe_unused, bool *cancelled __maybe_unused,
|
||||
BUFFER *result, const char *source, void *data __maybe_unused) {
|
||||
|
||||
char buf[strlen(id) + 1];
|
||||
memcpy(buf, id, sizeof(buf));
|
||||
|
||||
char *words[100] = { NULL };
|
||||
size_t num_words = quoted_strings_splitter_dyncfg_id(buf, words, 100);
|
||||
size_t i = 0;
|
||||
int code = HTTP_RESP_INTERNAL_SERVER_ERROR;
|
||||
|
||||
char *health_prefix = get_word(words, num_words, i++);
|
||||
if(!health_prefix || !*health_prefix || strcmp(health_prefix, "health") != 0)
|
||||
return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "first component of id is not 'health'");
|
||||
|
||||
char *alert_prefix = get_word(words, num_words, i++);
|
||||
if(!alert_prefix || !*alert_prefix || strcmp(alert_prefix, "alert") != 0)
|
||||
return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "second component of id is not 'alert'");
|
||||
|
||||
char *type_prefix = get_word(words, num_words, i++);
|
||||
if(!type_prefix || !*type_prefix || strcmp(type_prefix, "prototype") != 0)
|
||||
return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "third component of id is not 'prototype'");
|
||||
|
||||
char *alert_name = get_word(words, num_words, i++);
|
||||
if(!alert_name || !*alert_name) {
|
||||
// action on the prototype template
|
||||
|
||||
code = dyncfg_health_prototype_template_action(result, cmd, add_name, payload, source);
|
||||
}
|
||||
else {
|
||||
// action on a specific alert prototype
|
||||
|
||||
code = dyncfg_health_prototype_job_action(result, cmd, payload, source, alert_name);
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
void health_dyncfg_unregister_all_prototypes(void) {
|
||||
char key[HEALTH_CONF_MAX_LINE];
|
||||
RRD_ALERT_PROTOTYPE *ap;
|
||||
|
||||
// remove dyncfg
|
||||
// it is ok if they are not added before
|
||||
|
||||
dfe_start_read(health_globals.prototypes.dict, ap) {
|
||||
snprintfz(key, sizeof(key), DYNCFG_HEALTH_ALERT_PROTOTYPE_PREFIX ":%s", string2str(ap->config.name));
|
||||
dyncfg_del(localhost, key);
|
||||
}
|
||||
dfe_done(ap);
|
||||
dyncfg_del(localhost, DYNCFG_HEALTH_ALERT_PROTOTYPE_PREFIX);
|
||||
}
|
||||
|
||||
static void health_dyncfg_register_prototype(RRD_ALERT_PROTOTYPE *ap) {
|
||||
char key[HEALTH_CONF_MAX_LINE];
|
||||
|
||||
// bool trace = false;
|
||||
// if(string_strcmp(ap->config.name, "ram_available") == 0)
|
||||
// trace = true;
|
||||
|
||||
snprintfz(key, sizeof(key), DYNCFG_HEALTH_ALERT_PROTOTYPE_PREFIX ":%s", string2str(ap->config.name));
|
||||
dyncfg_add(localhost, key, "/health/alerts/prototypes",
|
||||
ap->_internal.enabled ? DYNCFG_STATUS_ACCEPTED : DYNCFG_STATUS_DISABLED, DYNCFG_TYPE_JOB,
|
||||
ap->config.source_type, string2str(ap->config.source),
|
||||
DYNCFG_CMD_SCHEMA | DYNCFG_CMD_GET | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE |
|
||||
DYNCFG_CMD_UPDATE | DYNCFG_CMD_TEST |
|
||||
(ap->config.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !ap->_internal.is_on_disk ? DYNCFG_CMD_REMOVE : 0),
|
||||
dyncfg_health_cb, NULL);
|
||||
|
||||
#ifdef NETDATA_TEST_HEALTH_PROTOTYPES_JSON_AND_PARSING
|
||||
{
|
||||
// make sure we can generate valid json, parse it back and come up to the same object
|
||||
|
||||
CLEAN_BUFFER *original = buffer_create(0, NULL);
|
||||
CLEAN_BUFFER *parsed = buffer_create(0, NULL);
|
||||
CLEAN_BUFFER *error = buffer_create(0, NULL);
|
||||
health_prototype_to_json(original, ap, true);
|
||||
RRD_ALERT_PROTOTYPE *t = health_prototype_payload_parse(buffer_tostring(original), buffer_strlen(original), error, string2str(ap->config.name));
|
||||
if(!t)
|
||||
fatal("hey! cannot parse: %s", buffer_tostring(error));
|
||||
|
||||
health_prototype_to_json(parsed, t, true);
|
||||
|
||||
if(strcmp(buffer_tostring(original), buffer_tostring(parsed)) != 0)
|
||||
fatal("hey! they are different!");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void health_dyncfg_register_all_prototypes(void) {
|
||||
RRD_ALERT_PROTOTYPE *ap;
|
||||
|
||||
dyncfg_add(localhost,
|
||||
DYNCFG_HEALTH_ALERT_PROTOTYPE_PREFIX, "/health/alerts/prototypes",
|
||||
DYNCFG_STATUS_ACCEPTED, DYNCFG_TYPE_TEMPLATE,
|
||||
DYNCFG_SOURCE_TYPE_INTERNAL, "internal",
|
||||
DYNCFG_CMD_SCHEMA | DYNCFG_CMD_ADD | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE,
|
||||
dyncfg_health_cb, NULL);
|
||||
|
||||
dfe_start_read(health_globals.prototypes.dict, ap) {
|
||||
if(ap->config.source_type != DYNCFG_SOURCE_TYPE_DYNCFG)
|
||||
health_dyncfg_register_prototype(ap);
|
||||
}
|
||||
dfe_done(ap);
|
||||
}
|
751
health/health_event_loop.c
Normal file
751
health/health_event_loop.c
Normal file
|
@ -0,0 +1,751 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "health.h"
|
||||
#include "health_internals.h"
|
||||
|
||||
#define WORKER_HEALTH_JOB_RRD_LOCK 0
|
||||
#define WORKER_HEALTH_JOB_HOST_LOCK 1
|
||||
#define WORKER_HEALTH_JOB_DB_QUERY 2
|
||||
#define WORKER_HEALTH_JOB_CALC_EVAL 3
|
||||
#define WORKER_HEALTH_JOB_WARNING_EVAL 4
|
||||
#define WORKER_HEALTH_JOB_CRITICAL_EVAL 5
|
||||
#define WORKER_HEALTH_JOB_ALARM_LOG_ENTRY 6
|
||||
#define WORKER_HEALTH_JOB_ALARM_LOG_PROCESS 7
|
||||
#define WORKER_HEALTH_JOB_DELAYED_INIT_RRDSET 8
|
||||
#define WORKER_HEALTH_JOB_DELAYED_INIT_RRDDIM 9
|
||||
|
||||
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 10
|
||||
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 10
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// health main thread and friends
|
||||
|
||||
static inline RRDCALC_STATUS rrdcalc_value2status(NETDATA_DOUBLE n) {
|
||||
if(isnan(n) || isinf(n)) return RRDCALC_STATUS_UNDEFINED;
|
||||
if(n) return RRDCALC_STATUS_RAISED;
|
||||
return RRDCALC_STATUS_CLEAR;
|
||||
}
|
||||
|
||||
static inline int rrdcalc_isrunnable(RRDCALC *rc, time_t now, time_t *next_run) {
|
||||
if(unlikely(!rc->rrdset)) {
|
||||
netdata_log_debug(D_HEALTH, "Health not running alarm '%s.%s'. It is not linked to a chart.", rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(unlikely(rc->next_update > now)) {
|
||||
if (unlikely(*next_run > rc->next_update)) {
|
||||
// update the next_run time of the main loop
|
||||
// to run this alarm precisely the time required
|
||||
*next_run = rc->next_update;
|
||||
}
|
||||
|
||||
netdata_log_debug(D_HEALTH, "Health not examining alarm '%s.%s' yet (will do in %d secs).", rrdcalc_chart_name(rc), rrdcalc_name(rc), (int) (rc->next_update - now));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(unlikely(!rc->config.update_every)) {
|
||||
netdata_log_debug(D_HEALTH, "Health not running alarm '%s.%s'. It does not have an update frequency", rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(unlikely(rrdset_flag_check(rc->rrdset, RRDSET_FLAG_OBSOLETE))) {
|
||||
netdata_log_debug(D_HEALTH, "Health not running alarm '%s.%s'. The chart has been marked as obsolete", rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(unlikely(!rc->rrdset->last_collected_time.tv_sec || rc->rrdset->counter_done < 2)) {
|
||||
netdata_log_debug(D_HEALTH, "Health not running alarm '%s.%s'. Chart is not fully collected yet.", rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int update_every = rc->rrdset->update_every;
|
||||
time_t first = rrdset_first_entry_s(rc->rrdset);
|
||||
time_t last = rrdset_last_entry_s(rc->rrdset);
|
||||
|
||||
if(unlikely(now + update_every < first /* || now - update_every > last */)) {
|
||||
netdata_log_debug(D_HEALTH
|
||||
, "Health not examining alarm '%s.%s' yet (wanted time is out of bounds - we need %lu but got %lu - %lu)."
|
||||
, rrdcalc_chart_name(rc), rrdcalc_name(rc), (unsigned long) now, (unsigned long) first
|
||||
, (unsigned long) last);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(RRDCALC_HAS_DB_LOOKUP(rc)) {
|
||||
time_t needed = now + rc->config.before + rc->config.after;
|
||||
|
||||
if(needed + update_every < first || needed - update_every > last) {
|
||||
netdata_log_debug(D_HEALTH
|
||||
, "Health not examining alarm '%s.%s' yet (not enough data yet - we need %lu but got %lu - %lu)."
|
||||
, rrdcalc_chart_name(rc), rrdcalc_name(rc), (unsigned long) needed, (unsigned long) first
|
||||
, (unsigned long) last);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void health_sleep(time_t next_run, unsigned int loop __maybe_unused) {
|
||||
time_t now = now_realtime_sec();
|
||||
if(now < next_run) {
|
||||
worker_is_idle();
|
||||
netdata_log_debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration in %d secs", loop, (int) (next_run - now));
|
||||
while (now < next_run && service_running(SERVICE_HEALTH)) {
|
||||
sleep_usec(USEC_PER_SEC);
|
||||
now = now_realtime_sec();
|
||||
}
|
||||
}
|
||||
else {
|
||||
netdata_log_debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration now", loop);
|
||||
}
|
||||
}
|
||||
|
||||
static void sql_health_postpone_queue_removed(RRDHOST *host __maybe_unused) {
|
||||
#ifdef ENABLE_ACLK
|
||||
if (netdata_cloud_enabled) {
|
||||
struct aclk_sync_cfg_t *wc = host->aclk_config;
|
||||
if (unlikely(!wc)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (wc->alert_queue_removed >= 1) {
|
||||
wc->alert_queue_removed+=6;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void health_execute_delayed_initializations(RRDHOST *host) {
|
||||
health_plugin_init();
|
||||
|
||||
RRDSET *st;
|
||||
bool must_postpone = false;
|
||||
|
||||
if (!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION)) return;
|
||||
rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION);
|
||||
|
||||
rrdset_foreach_reentrant(st, host) {
|
||||
if(!rrdset_flag_check(st, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION)) continue;
|
||||
rrdset_flag_clear(st, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION);
|
||||
|
||||
worker_is_busy(WORKER_HEALTH_JOB_DELAYED_INIT_RRDSET);
|
||||
health_prototype_alerts_for_rrdset_incrementally(st);
|
||||
must_postpone = true;
|
||||
}
|
||||
rrdset_foreach_done(st);
|
||||
if (must_postpone)
|
||||
sql_health_postpone_queue_removed(host);
|
||||
}
|
||||
|
||||
static void health_initialize_rrdhost(RRDHOST *host) {
|
||||
health_plugin_init();
|
||||
|
||||
if(!host->health.health_enabled ||
|
||||
rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH) ||
|
||||
!service_running(SERVICE_HEALTH))
|
||||
return;
|
||||
|
||||
rrdhost_flag_set(host, RRDHOST_FLAG_INITIALIZED_HEALTH);
|
||||
|
||||
host->health.health_default_warn_repeat_every = health_globals.config.default_warn_repeat_every;
|
||||
host->health.health_default_crit_repeat_every = health_globals.config.default_crit_repeat_every;
|
||||
host->health_log.max = health_globals.config.health_log_entries_max;
|
||||
host->health_log.health_log_history = health_globals.config.health_log_history;
|
||||
host->health.health_default_exec = string_dup(health_globals.config.default_exec);
|
||||
host->health.health_default_recipient = string_dup(health_globals.config.default_recipient);
|
||||
host->health.use_summary_for_notifications = health_globals.config.use_summary_for_notifications;
|
||||
|
||||
host->health_log.next_log_id = (uint32_t)now_realtime_sec();
|
||||
host->health_log.next_alarm_id = 0;
|
||||
|
||||
rw_spinlock_init(&host->health_log.spinlock);
|
||||
sql_health_alarm_log_load(host);
|
||||
health_apply_prototypes_to_host(host);
|
||||
}
|
||||
|
||||
static inline int check_if_resumed_from_suspension(void) {
|
||||
static usec_t last_realtime = 0, last_monotonic = 0;
|
||||
usec_t realtime = now_realtime_usec(), monotonic = now_monotonic_usec();
|
||||
int ret = 0;
|
||||
|
||||
// detect if monotonic and realtime have twice the difference
|
||||
// in which case we assume the system was just waken from hibernation
|
||||
|
||||
if(last_realtime && last_monotonic && realtime - last_realtime > 2 * (monotonic - last_monotonic))
|
||||
ret = 1;
|
||||
|
||||
last_realtime = realtime;
|
||||
last_monotonic = monotonic;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void health_event_loop(void) {
|
||||
bool health_running_logged = false;
|
||||
|
||||
unsigned int loop = 0;
|
||||
|
||||
while(service_running(SERVICE_HEALTH)) {
|
||||
loop++;
|
||||
netdata_log_debug(D_HEALTH, "Health monitoring iteration no %u started", loop);
|
||||
|
||||
time_t now = now_realtime_sec();
|
||||
int runnable = 0, apply_hibernation_delay = 0;
|
||||
time_t next_run = now + health_globals.config.run_at_least_every_seconds;
|
||||
RRDCALC *rc;
|
||||
RRDHOST *host;
|
||||
|
||||
if (unlikely(check_if_resumed_from_suspension())) {
|
||||
apply_hibernation_delay = 1;
|
||||
|
||||
nd_log(NDLS_DAEMON, NDLP_NOTICE,
|
||||
"Postponing alarm checks for %"PRId32" seconds, "
|
||||
"because it seems that the system was just resumed from suspension.",
|
||||
(int32_t)health_globals.config.postpone_alarms_during_hibernation_for_seconds);
|
||||
}
|
||||
|
||||
if (unlikely(silencers->all_alarms && silencers->stype == STYPE_DISABLE_ALARMS)) {
|
||||
static int logged=0;
|
||||
if (!logged) {
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"Skipping health checks, because all alarms are disabled via API command.");
|
||||
logged = 1;
|
||||
}
|
||||
}
|
||||
|
||||
worker_is_busy(WORKER_HEALTH_JOB_RRD_LOCK);
|
||||
dfe_start_reentrant(rrdhost_root_index, host) {
|
||||
|
||||
if(unlikely(!service_running(SERVICE_HEALTH)))
|
||||
break;
|
||||
|
||||
if (unlikely(!host->health.health_enabled))
|
||||
continue;
|
||||
|
||||
if (unlikely(!rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH)))
|
||||
health_initialize_rrdhost(host);
|
||||
|
||||
health_execute_delayed_initializations(host);
|
||||
|
||||
if (unlikely(apply_hibernation_delay)) {
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Postponing health checks for %"PRId32" seconds.",
|
||||
rrdhost_hostname(host),
|
||||
health_globals.config.postpone_alarms_during_hibernation_for_seconds);
|
||||
|
||||
host->health.health_delay_up_to =
|
||||
now + health_globals.config.postpone_alarms_during_hibernation_for_seconds;
|
||||
}
|
||||
|
||||
if (unlikely(host->health.health_delay_up_to)) {
|
||||
if (unlikely(now < host->health.health_delay_up_to)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Resuming health checks after delay.",
|
||||
rrdhost_hostname(host));
|
||||
|
||||
host->health.health_delay_up_to = 0;
|
||||
}
|
||||
|
||||
// wait until cleanup of obsolete charts on children is complete
|
||||
if (host != localhost) {
|
||||
if (unlikely(host->trigger_chart_obsoletion_check == 1)) {
|
||||
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Waiting for chart obsoletion check.",
|
||||
rrdhost_hostname(host));
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!health_running_logged) {
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Health is running.",
|
||||
rrdhost_hostname(host));
|
||||
|
||||
health_running_logged = true;
|
||||
}
|
||||
|
||||
worker_is_busy(WORKER_HEALTH_JOB_HOST_LOCK);
|
||||
|
||||
// the first loop is to lookup values from the db
|
||||
foreach_rrdcalc_in_rrdhost_read(host, rc) {
|
||||
|
||||
if(unlikely(!service_running(SERVICE_HEALTH)))
|
||||
break;
|
||||
|
||||
rrdcalc_update_info_using_rrdset_labels(rc);
|
||||
|
||||
if (health_silencers_update_disabled_silenced(host, rc))
|
||||
continue;
|
||||
|
||||
// create an alert removed event if the chart is obsolete and
|
||||
// has stopped being collected for 60 seconds
|
||||
if (unlikely(rc->rrdset && rc->status != RRDCALC_STATUS_REMOVED &&
|
||||
rrdset_flag_check(rc->rrdset, RRDSET_FLAG_OBSOLETE) &&
|
||||
now > (rc->rrdset->last_collected_time.tv_sec + 60))) {
|
||||
if (!rrdcalc_isrepeating(rc)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
|
||||
time_t now_tmp = now_realtime_sec();
|
||||
|
||||
ALARM_ENTRY *ae =
|
||||
health_create_alarm_entry(
|
||||
host,
|
||||
rc,
|
||||
now_tmp,
|
||||
now_tmp - rc->last_status_change,
|
||||
rc->value,
|
||||
NAN,
|
||||
rc->status,
|
||||
RRDCALC_STATUS_REMOVED,
|
||||
0,
|
||||
rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0);
|
||||
|
||||
if (ae) {
|
||||
health_log_alert(host, ae);
|
||||
health_alarm_log_add_entry(host, ae);
|
||||
rc->old_status = rc->status;
|
||||
rc->status = RRDCALC_STATUS_REMOVED;
|
||||
rc->last_status_change = now_tmp;
|
||||
rc->last_status_change_value = rc->value;
|
||||
rc->last_updated = now_tmp;
|
||||
rc->value = NAN;
|
||||
|
||||
#ifdef ENABLE_ACLK
|
||||
if (netdata_cloud_enabled)
|
||||
sql_queue_alarm_to_aclk(host, ae, true);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!rrdcalc_isrunnable(rc, now, &next_run))) {
|
||||
if (unlikely(rc->run_flags & RRDCALC_FLAG_RUNNABLE))
|
||||
rc->run_flags &= ~RRDCALC_FLAG_RUNNABLE;
|
||||
continue;
|
||||
}
|
||||
|
||||
runnable++;
|
||||
rc->old_value = rc->value;
|
||||
rc->run_flags |= RRDCALC_FLAG_RUNNABLE;
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// if there is database lookup, do it
|
||||
|
||||
if (unlikely(RRDCALC_HAS_DB_LOOKUP(rc))) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_DB_QUERY);
|
||||
|
||||
/* time_t old_db_timestamp = rc->db_before; */
|
||||
int value_is_null = 0;
|
||||
|
||||
int ret = rrdset2value_api_v1(rc->rrdset, NULL, &rc->value, rrdcalc_dimensions(rc), 1,
|
||||
rc->config.after, rc->config.before, rc->config.group, NULL,
|
||||
0, rc->config.options | RRDR_OPTION_SELECTED_TIER,
|
||||
&rc->db_after,&rc->db_before,
|
||||
NULL, NULL, NULL,
|
||||
&value_is_null, NULL, 0, 0,
|
||||
QUERY_SOURCE_HEALTH, STORAGE_PRIORITY_SYNCHRONOUS);
|
||||
|
||||
if (unlikely(ret != 200)) {
|
||||
// database lookup failed
|
||||
rc->value = NAN;
|
||||
rc->run_flags |= RRDCALC_FLAG_DB_ERROR;
|
||||
|
||||
netdata_log_debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': database lookup returned error %d",
|
||||
rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc), ret
|
||||
);
|
||||
} else
|
||||
rc->run_flags &= ~RRDCALC_FLAG_DB_ERROR;
|
||||
|
||||
if (unlikely(value_is_null)) {
|
||||
// collected value is null
|
||||
rc->value = NAN;
|
||||
rc->run_flags |= RRDCALC_FLAG_DB_NAN;
|
||||
|
||||
netdata_log_debug(D_HEALTH,
|
||||
"Health on host '%s', alarm '%s.%s': database lookup returned empty value (possibly value is not collected yet)",
|
||||
rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc)
|
||||
);
|
||||
} else
|
||||
rc->run_flags &= ~RRDCALC_FLAG_DB_NAN;
|
||||
|
||||
netdata_log_debug(D_HEALTH, "Health on host '%s', alarm '%s.%s': database lookup gave value " NETDATA_DOUBLE_FORMAT,
|
||||
rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc), rc->value
|
||||
);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// if there is calculation expression, run it
|
||||
|
||||
if (unlikely(rc->config.calculation)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_CALC_EVAL);
|
||||
|
||||
if (unlikely(!expression_evaluate(rc->config.calculation))) {
|
||||
// calculation failed
|
||||
rc->value = NAN;
|
||||
rc->run_flags |= RRDCALC_FLAG_CALC_ERROR;
|
||||
|
||||
netdata_log_debug(
|
||||
D_HEALTH, "Health on host '%s', alarm '%s.%s': expression '%s' failed: %s",
|
||||
rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
|
||||
expression_parsed_as(rc->config.calculation), expression_error_msg(rc->config.calculation)
|
||||
);
|
||||
}
|
||||
else {
|
||||
rc->run_flags &= ~RRDCALC_FLAG_CALC_ERROR;
|
||||
|
||||
netdata_log_debug(
|
||||
D_HEALTH, "Health on host '%s', alarm '%s.%s': expression '%s' gave value "
|
||||
NETDATA_DOUBLE_FORMAT": %s (source: %s)",
|
||||
rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
|
||||
expression_parsed_as(rc->config.calculation),
|
||||
expression_result(rc->config.calculation),
|
||||
expression_error_msg(rc->config.calculation),
|
||||
rrdcalc_source(rc)
|
||||
);
|
||||
|
||||
rc->value = expression_result(rc->config.calculation);
|
||||
}
|
||||
}
|
||||
}
|
||||
foreach_rrdcalc_in_rrdhost_done(rc);
|
||||
|
||||
struct health_raised_summary *hrm = alerts_raised_summary_create(host);
|
||||
|
||||
if (unlikely(runnable && service_running(SERVICE_HEALTH))) {
|
||||
foreach_rrdcalc_in_rrdhost_read(host, rc) {
|
||||
if(unlikely(!service_running(SERVICE_HEALTH)))
|
||||
break;
|
||||
|
||||
if (unlikely(!(rc->run_flags & RRDCALC_FLAG_RUNNABLE)))
|
||||
continue;
|
||||
|
||||
if (rc->run_flags & RRDCALC_FLAG_DISABLED) {
|
||||
continue;
|
||||
}
|
||||
RRDCALC_STATUS warning_status = RRDCALC_STATUS_UNDEFINED;
|
||||
RRDCALC_STATUS critical_status = RRDCALC_STATUS_UNDEFINED;
|
||||
|
||||
// --------------------------------------------------------
|
||||
// check the warning expression
|
||||
|
||||
if (likely(rc->config.warning)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_WARNING_EVAL);
|
||||
|
||||
if (unlikely(!expression_evaluate(rc->config.warning))) {
|
||||
// calculation failed
|
||||
rc->run_flags |= RRDCALC_FLAG_WARN_ERROR;
|
||||
|
||||
netdata_log_debug(D_HEALTH,
|
||||
"Health on host '%s', alarm '%s.%s': warning expression failed with error: %s",
|
||||
rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
|
||||
expression_error_msg(rc->config.warning)
|
||||
);
|
||||
} else {
|
||||
rc->run_flags &= ~RRDCALC_FLAG_WARN_ERROR;
|
||||
netdata_log_debug(D_HEALTH,
|
||||
"Health on host '%s', alarm '%s.%s': warning expression gave value "
|
||||
NETDATA_DOUBLE_FORMAT ": %s (source: %s)",
|
||||
rrdhost_hostname(host),
|
||||
rrdcalc_chart_name(rc),
|
||||
rrdcalc_name(rc),
|
||||
expression_result(rc->config.warning),
|
||||
expression_error_msg(rc->config.warning),
|
||||
rrdcalc_source(rc)
|
||||
);
|
||||
warning_status = rrdcalc_value2status(expression_result(rc->config.warning));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
// check the critical expression
|
||||
|
||||
if (likely(rc->config.critical)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_CRITICAL_EVAL);
|
||||
|
||||
if (unlikely(!expression_evaluate(rc->config.critical))) {
|
||||
// calculation failed
|
||||
rc->run_flags |= RRDCALC_FLAG_CRIT_ERROR;
|
||||
|
||||
netdata_log_debug(D_HEALTH,
|
||||
"Health on host '%s', alarm '%s.%s': critical expression failed with error: %s",
|
||||
rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
|
||||
expression_error_msg(rc->config.critical)
|
||||
);
|
||||
} else {
|
||||
rc->run_flags &= ~RRDCALC_FLAG_CRIT_ERROR;
|
||||
netdata_log_debug(D_HEALTH,
|
||||
"Health on host '%s', alarm '%s.%s': critical expression gave value "
|
||||
NETDATA_DOUBLE_FORMAT ": %s (source: %s)",
|
||||
rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
|
||||
expression_result(rc->config.critical),
|
||||
expression_error_msg(rc->config.critical),
|
||||
rrdcalc_source(rc)
|
||||
);
|
||||
critical_status = rrdcalc_value2status(expression_result(rc->config.critical));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
// decide the final alarm status
|
||||
|
||||
RRDCALC_STATUS status = RRDCALC_STATUS_UNDEFINED;
|
||||
|
||||
switch (warning_status) {
|
||||
case RRDCALC_STATUS_CLEAR:
|
||||
status = RRDCALC_STATUS_CLEAR;
|
||||
break;
|
||||
|
||||
case RRDCALC_STATUS_RAISED:
|
||||
status = RRDCALC_STATUS_WARNING;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (critical_status) {
|
||||
case RRDCALC_STATUS_CLEAR:
|
||||
if (status == RRDCALC_STATUS_UNDEFINED)
|
||||
status = RRDCALC_STATUS_CLEAR;
|
||||
break;
|
||||
|
||||
case RRDCALC_STATUS_RAISED:
|
||||
status = RRDCALC_STATUS_CRITICAL;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------
|
||||
// check if the new status and the old differ
|
||||
|
||||
if (status != rc->status) {
|
||||
|
||||
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
|
||||
int delay;
|
||||
|
||||
// apply trigger hysteresis
|
||||
|
||||
if (now > rc->delay_up_to_timestamp) {
|
||||
rc->delay_up_current = rc->config.delay_up_duration;
|
||||
rc->delay_down_current = rc->config.delay_down_duration;
|
||||
rc->delay_last = 0;
|
||||
rc->delay_up_to_timestamp = 0;
|
||||
} else {
|
||||
rc->delay_up_current = (int)((float)rc->delay_up_current * rc->config.delay_multiplier);
|
||||
if (rc->delay_up_current > rc->config.delay_max_duration)
|
||||
rc->delay_up_current = rc->config.delay_max_duration;
|
||||
|
||||
rc->delay_down_current = (int)((float)rc->delay_down_current * rc->config.delay_multiplier);
|
||||
if (rc->delay_down_current > rc->config.delay_max_duration)
|
||||
rc->delay_down_current = rc->config.delay_max_duration;
|
||||
}
|
||||
|
||||
if (status > rc->status)
|
||||
delay = rc->delay_up_current;
|
||||
else
|
||||
delay = rc->delay_down_current;
|
||||
|
||||
// COMMENTED: because we do need to send raising alarms
|
||||
// if (now + delay < rc->delay_up_to_timestamp)
|
||||
// delay = (int)(rc->delay_up_to_timestamp - now);
|
||||
|
||||
rc->delay_last = delay;
|
||||
rc->delay_up_to_timestamp = now + delay;
|
||||
|
||||
ALARM_ENTRY *ae =
|
||||
health_create_alarm_entry(
|
||||
host,
|
||||
rc,
|
||||
now,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
rc->status,
|
||||
status,
|
||||
rc->delay_last,
|
||||
(
|
||||
((rc->config.alert_action_options & ALERT_ACTION_OPTION_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) |
|
||||
((rc->run_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0) |
|
||||
(rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0)
|
||||
)
|
||||
);
|
||||
|
||||
health_log_alert(host, ae);
|
||||
health_alarm_log_add_entry(host, ae);
|
||||
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Alert event for [%s.%s], value [%s], status [%s].",
|
||||
rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae), ae_new_value_string(ae),
|
||||
rrdcalc_status2string(ae->new_status));
|
||||
|
||||
rc->last_status_change_value = rc->value;
|
||||
rc->last_status_change = now;
|
||||
rc->old_status = rc->status;
|
||||
rc->status = status;
|
||||
|
||||
if(unlikely(rrdcalc_isrepeating(rc))) {
|
||||
rc->last_repeat = now;
|
||||
if (rc->status == RRDCALC_STATUS_CLEAR)
|
||||
rc->run_flags |= RRDCALC_FLAG_RUN_ONCE;
|
||||
}
|
||||
}
|
||||
|
||||
rc->last_updated = now;
|
||||
rc->next_update = now + rc->config.update_every;
|
||||
|
||||
if (next_run > rc->next_update)
|
||||
next_run = rc->next_update;
|
||||
}
|
||||
foreach_rrdcalc_in_rrdhost_done(rc);
|
||||
|
||||
alerts_raised_summary_populate(hrm);
|
||||
|
||||
// process repeating alarms
|
||||
foreach_rrdcalc_in_rrdhost_read(host, rc) {
|
||||
if(unlikely(!service_running(SERVICE_HEALTH)))
|
||||
break;
|
||||
|
||||
int repeat_every = 0;
|
||||
if(unlikely(rrdcalc_isrepeating(rc) && rc->delay_up_to_timestamp <= now)) {
|
||||
if(unlikely(rc->status == RRDCALC_STATUS_WARNING)) {
|
||||
rc->run_flags &= ~RRDCALC_FLAG_RUN_ONCE;
|
||||
repeat_every = (int)rc->config.warn_repeat_every;
|
||||
}
|
||||
else if(unlikely(rc->status == RRDCALC_STATUS_CRITICAL)) {
|
||||
rc->run_flags &= ~RRDCALC_FLAG_RUN_ONCE;
|
||||
repeat_every = (int)rc->config.crit_repeat_every;
|
||||
}
|
||||
else if(unlikely(rc->status == RRDCALC_STATUS_CLEAR)) {
|
||||
if(!(rc->run_flags & RRDCALC_FLAG_RUN_ONCE) &&
|
||||
(rc->old_status == RRDCALC_STATUS_CRITICAL || rc->old_status == RRDCALC_STATUS_WARNING))
|
||||
repeat_every = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
continue;
|
||||
|
||||
if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) {
|
||||
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
|
||||
rc->last_repeat = now;
|
||||
if (likely(rc->times_repeat < UINT32_MAX)) rc->times_repeat++;
|
||||
ALARM_ENTRY *ae =
|
||||
health_create_alarm_entry(
|
||||
host,
|
||||
rc,
|
||||
now,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
rc->old_status,
|
||||
rc->status,
|
||||
rc->delay_last,
|
||||
(
|
||||
((rc->config.alert_action_options & ALERT_ACTION_OPTION_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) |
|
||||
((rc->run_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0) |
|
||||
(rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0)
|
||||
)
|
||||
);
|
||||
|
||||
health_log_alert(host, ae);
|
||||
ae->last_repeat = rc->last_repeat;
|
||||
if (!(rc->run_flags & RRDCALC_FLAG_RUN_ONCE) && rc->status == RRDCALC_STATUS_CLEAR) {
|
||||
ae->flags |= HEALTH_ENTRY_RUN_ONCE;
|
||||
}
|
||||
rc->run_flags |= RRDCALC_FLAG_RUN_ONCE;
|
||||
health_send_notification(host, ae, hrm);
|
||||
netdata_log_debug(D_HEALTH, "Notification sent for the repeating alarm %u.", ae->alarm_id);
|
||||
health_alarm_wait_for_execution(ae);
|
||||
health_alarm_log_free_one_nochecks_nounlink(ae);
|
||||
}
|
||||
}
|
||||
foreach_rrdcalc_in_rrdhost_done(rc);
|
||||
}
|
||||
|
||||
if (unlikely(!service_running(SERVICE_HEALTH)))
|
||||
break;
|
||||
|
||||
// execute notifications
|
||||
// and cleanup
|
||||
|
||||
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_PROCESS);
|
||||
health_alarm_log_process_to_send_notifications(host, hrm);
|
||||
alerts_raised_summary_free(hrm);
|
||||
|
||||
if (unlikely(!service_running(SERVICE_HEALTH))) {
|
||||
// wait for all notifications to finish before allowing health to be cleaned up
|
||||
wait_for_all_notifications_to_finish_before_allowing_health_to_be_cleaned_up();
|
||||
break;
|
||||
}
|
||||
#ifdef ENABLE_ACLK
|
||||
if (netdata_cloud_enabled) {
|
||||
struct aclk_sync_cfg_t *wc = host->aclk_config;
|
||||
if (unlikely(!wc))
|
||||
continue;
|
||||
|
||||
if (wc->alert_queue_removed == 1) {
|
||||
sql_queue_removed_alerts_to_aclk(host);
|
||||
} else if (wc->alert_queue_removed > 1) {
|
||||
wc->alert_queue_removed--;
|
||||
}
|
||||
|
||||
if (wc->alert_checkpoint_req == 1) {
|
||||
aclk_push_alarm_checkpoint(host);
|
||||
} else if (wc->alert_checkpoint_req > 1) {
|
||||
wc->alert_checkpoint_req--;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
dfe_done(host);
|
||||
|
||||
// wait for all notifications to finish before allowing health to be cleaned up
|
||||
wait_for_all_notifications_to_finish_before_allowing_health_to_be_cleaned_up();
|
||||
|
||||
if(unlikely(!service_running(SERVICE_HEALTH)))
|
||||
break;
|
||||
|
||||
health_sleep(next_run, loop);
|
||||
|
||||
} // forever
|
||||
}
|
||||
|
||||
|
||||
static void health_main_cleanup(void *ptr) {
|
||||
worker_unregister();
|
||||
|
||||
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
||||
netdata_log_info("cleaning up...");
|
||||
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
|
||||
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"Health thread ended.");
|
||||
}
|
||||
|
||||
void *health_main(void *ptr) {
|
||||
worker_register("HEALTH");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_RRD_LOCK, "rrd lock");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_HOST_LOCK, "host lock");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_DB_QUERY, "db lookup");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_CALC_EVAL, "calc eval");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_WARNING_EVAL, "warning eval");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_CRITICAL_EVAL, "critical eval");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY, "alarm log entry");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_ALARM_LOG_PROCESS, "alarm log process");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_DELAYED_INIT_RRDSET, "rrdset init");
|
||||
worker_register_job_name(WORKER_HEALTH_JOB_DELAYED_INIT_RRDDIM, "rrddim init");
|
||||
|
||||
netdata_thread_cleanup_push(health_main_cleanup, ptr);
|
||||
{
|
||||
health_event_loop();
|
||||
}
|
||||
netdata_thread_cleanup_pop(1);
|
||||
return NULL;
|
||||
}
|
130
health/health_internals.h
Normal file
130
health/health_internals.h
Normal file
|
@ -0,0 +1,130 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_HEALTH_INTERNALS_H
|
||||
#define NETDATA_HEALTH_INTERNALS_H
|
||||
|
||||
#include "health.h"
|
||||
|
||||
#define HEALTH_LOG_ENTRIES_DEFAULT 1000U
|
||||
#define HEALTH_LOG_ENTRIES_MAX 100000U
|
||||
#define HEALTH_LOG_ENTRIES_MIN 10U
|
||||
|
||||
#define HEALTH_LOG_HISTORY_DEFAULT (5 * 86400)
|
||||
|
||||
#define HEALTH_CONF_MAX_LINE 4096
|
||||
|
||||
#define HEALTH_ALARM_KEY "alarm"
|
||||
#define HEALTH_TEMPLATE_KEY "template"
|
||||
#define HEALTH_CHART_KEY "chart"
|
||||
#define HEALTH_CONTEXT_KEY "context"
|
||||
#define HEALTH_ON_KEY "on"
|
||||
#define HEALTH_HOST_KEY "hosts"
|
||||
#define HEALTH_OS_KEY "os"
|
||||
#define HEALTH_PLUGIN_KEY "plugin"
|
||||
#define HEALTH_MODULE_KEY "module"
|
||||
#define HEALTH_CHARTS_KEY "charts"
|
||||
#define HEALTH_LOOKUP_KEY "lookup"
|
||||
#define HEALTH_CALC_KEY "calc"
|
||||
#define HEALTH_EVERY_KEY "every"
|
||||
#define HEALTH_GREEN_KEY "green"
|
||||
#define HEALTH_RED_KEY "red"
|
||||
#define HEALTH_WARN_KEY "warn"
|
||||
#define HEALTH_CRIT_KEY "crit"
|
||||
#define HEALTH_EXEC_KEY "exec"
|
||||
#define HEALTH_RECIPIENT_KEY "to"
|
||||
#define HEALTH_UNITS_KEY "units"
|
||||
#define HEALTH_SUMMARY_KEY "summary"
|
||||
#define HEALTH_INFO_KEY "info"
|
||||
#define HEALTH_CLASS_KEY "class"
|
||||
#define HEALTH_COMPONENT_KEY "component"
|
||||
#define HEALTH_TYPE_KEY "type"
|
||||
#define HEALTH_DELAY_KEY "delay"
|
||||
#define HEALTH_OPTIONS_KEY "options"
|
||||
#define HEALTH_REPEAT_KEY "repeat"
|
||||
#define HEALTH_HOST_LABEL_KEY "host labels"
|
||||
#define HEALTH_FOREACH_KEY "foreach"
|
||||
#define HEALTH_CHART_LABEL_KEY "chart labels"
|
||||
|
||||
void alert_action_options_to_buffer_json_array(BUFFER *wb, const char *key, ALERT_ACTION_OPTIONS options);
|
||||
ALERT_ACTION_OPTIONS alert_action_options_parse(char *o);
|
||||
ALERT_ACTION_OPTIONS alert_action_options_parse_one(const char *o);
|
||||
|
||||
typedef struct rrd_alert_prototype {
|
||||
struct rrd_alert_match match;
|
||||
struct rrd_alert_config config;
|
||||
|
||||
struct {
|
||||
uint32_t uses;
|
||||
bool enabled;
|
||||
bool is_on_disk;
|
||||
SPINLOCK spinlock;
|
||||
struct rrd_alert_prototype *prev, *next;
|
||||
} _internal;
|
||||
} RRD_ALERT_PROTOTYPE;
|
||||
bool health_prototype_add(RRD_ALERT_PROTOTYPE *ap);
|
||||
void health_prototype_cleanup(RRD_ALERT_PROTOTYPE *ap);
|
||||
void health_prototype_free(RRD_ALERT_PROTOTYPE *ap);
|
||||
|
||||
struct health_plugin_globals {
|
||||
struct {
|
||||
SPINLOCK spinlock;
|
||||
bool done;
|
||||
} initialization;
|
||||
|
||||
struct {
|
||||
bool enabled;
|
||||
bool stock_enabled;
|
||||
bool use_summary_for_notifications;
|
||||
|
||||
unsigned int health_log_entries_max;
|
||||
uint32_t health_log_history; // the health log history in seconds to be kept in db
|
||||
|
||||
STRING *silencers_filename;
|
||||
STRING *default_exec;
|
||||
STRING *default_recipient;
|
||||
|
||||
SIMPLE_PATTERN *enabled_alerts;
|
||||
|
||||
uint32_t default_warn_repeat_every; // the default value for the interval between repeating warning notifications
|
||||
uint32_t default_crit_repeat_every; // the default value for the interval between repeating critical notifications
|
||||
|
||||
int32_t run_at_least_every_seconds;
|
||||
int32_t postpone_alarms_during_hibernation_for_seconds;
|
||||
} config;
|
||||
|
||||
struct {
|
||||
DICTIONARY *dict;
|
||||
} prototypes;
|
||||
};
|
||||
|
||||
extern struct health_plugin_globals health_globals;
|
||||
|
||||
int health_readfile(const char *filename, void *data, bool stock_config);
|
||||
void unlink_alarm_notify_in_progress(ALARM_ENTRY *ae);
|
||||
void wait_for_all_notifications_to_finish_before_allowing_health_to_be_cleaned_up(void);
|
||||
|
||||
void health_alarm_wait_for_execution(ALARM_ENTRY *ae);
|
||||
|
||||
bool rrdcalc_add_from_prototype(RRDHOST *host, RRDSET *st, RRD_ALERT_PROTOTYPE *ap);
|
||||
|
||||
int dyncfg_health_cb(const char *transaction, const char *id, DYNCFG_CMDS cmd, const char *add_name,
|
||||
BUFFER *payload, usec_t *stop_monotonic_ut, bool *cancelled,
|
||||
BUFFER *result, const char *source, void *data);
|
||||
|
||||
void health_dyncfg_unregister_all_prototypes(void);
|
||||
void health_dyncfg_register_all_prototypes(void);
|
||||
void health_prototype_to_json(BUFFER *wb, RRD_ALERT_PROTOTYPE *ap, bool for_hashing);
|
||||
|
||||
bool alert_variable_lookup(STRING *variable, void *data, NETDATA_DOUBLE *result);
|
||||
|
||||
struct health_raised_summary;
|
||||
struct health_raised_summary *alerts_raised_summary_create(RRDHOST *host);
|
||||
void alerts_raised_summary_populate(struct health_raised_summary *hrm);
|
||||
void alerts_raised_summary_free(struct health_raised_summary *hrm);
|
||||
void health_send_notification(RRDHOST *host, ALARM_ENTRY *ae, struct health_raised_summary *hrm);
|
||||
void health_alarm_log_process_to_send_notifications(RRDHOST *host, struct health_raised_summary *hrm);
|
||||
|
||||
void health_apply_prototype_to_host(RRDHOST *host, RRD_ALERT_PROTOTYPE *ap);
|
||||
void health_prototype_apply_to_all_hosts(RRD_ALERT_PROTOTYPE *ap);
|
||||
|
||||
#endif //NETDATA_HEALTH_INTERNALS_H
|
|
@ -41,7 +41,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
|
|||
format_value_and_unit(value_string, 100, rc->value, rrdcalc_units(rc), -1);
|
||||
|
||||
char hash_id[GUID_LEN + 1];
|
||||
uuid_unparse_lower(rc->config_hash_id, hash_id);
|
||||
uuid_unparse_lower(rc->config.hash_id, hash_id);
|
||||
|
||||
buffer_sprintf(wb,
|
||||
"\t\t\"%s.%s\": {\n"
|
||||
|
@ -82,42 +82,42 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
|
|||
, hash_id
|
||||
, rrdcalc_name(rc)
|
||||
, rrdcalc_chart_name(rc)
|
||||
, rc->classification?rrdcalc_classification(rc):"Unknown"
|
||||
, rc->component?rrdcalc_component(rc):"Unknown"
|
||||
, rc->type?rrdcalc_type(rc):"Unknown"
|
||||
, rc->config.classification?rrdcalc_classification(rc):"Unknown"
|
||||
, rc->config.component?rrdcalc_component(rc):"Unknown"
|
||||
, rc->config.type?rrdcalc_type(rc):"Unknown"
|
||||
, (rc->rrdset)?"true":"false"
|
||||
, (rc->run_flags & RRDCALC_FLAG_DISABLED)?"true":"false"
|
||||
, (rc->run_flags & RRDCALC_FLAG_SILENCED)?"true":"false"
|
||||
, rc->exec?rrdcalc_exec(rc):string2str(host->health.health_default_exec)
|
||||
, rc->recipient?rrdcalc_recipient(rc):string2str(host->health.health_default_recipient)
|
||||
, rc->config.exec?rrdcalc_exec(rc):string2str(host->health.health_default_exec)
|
||||
, rc->config.recipient?rrdcalc_recipient(rc):string2str(host->health.health_default_recipient)
|
||||
, rrdcalc_source(rc)
|
||||
, rrdcalc_units(rc)
|
||||
, rrdcalc_summary(rc)
|
||||
, rrdcalc_info(rc)
|
||||
, string2str(rc->summary)
|
||||
, string2str(rc->info)
|
||||
, rrdcalc_status2string(rc->status)
|
||||
, (unsigned long)rc->last_status_change
|
||||
, (unsigned long)rc->last_updated
|
||||
, (unsigned long)rc->next_update
|
||||
, rc->update_every
|
||||
, rc->delay_up_duration
|
||||
, rc->delay_down_duration
|
||||
, rc->delay_max_duration
|
||||
, rc->delay_multiplier
|
||||
, rc->config.update_every
|
||||
, rc->config.delay_up_duration
|
||||
, rc->config.delay_down_duration
|
||||
, rc->config.delay_max_duration
|
||||
, rc->config.delay_multiplier
|
||||
, rc->delay_last
|
||||
, (unsigned long)rc->delay_up_to_timestamp
|
||||
, rc->warn_repeat_every
|
||||
, rc->crit_repeat_every
|
||||
, rc->config.warn_repeat_every
|
||||
, rc->config.crit_repeat_every
|
||||
, value_string
|
||||
, (unsigned long)rc->last_repeat
|
||||
, (unsigned long)rc->times_repeat
|
||||
);
|
||||
|
||||
if(unlikely(rc->options & RRDCALC_OPTION_NO_CLEAR_NOTIFICATION)) {
|
||||
if(unlikely(rc->config.alert_action_options & ALERT_ACTION_OPTION_NO_CLEAR_NOTIFICATION)) {
|
||||
buffer_strcat(wb, "\t\t\t\"no_clear_notification\": true,\n");
|
||||
}
|
||||
|
||||
if(RRDCALC_HAS_DB_LOOKUP(rc)) {
|
||||
if(rc->dimensions)
|
||||
if(rc->config.dimensions)
|
||||
health_string2json(wb, "\t\t\t", "lookup_dimensions", rrdcalc_dimensions(rc), ",\n");
|
||||
|
||||
buffer_sprintf(wb,
|
||||
|
@ -129,35 +129,35 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
|
|||
"\t\t\t\"lookup_options\": \"",
|
||||
(unsigned long) rc->db_after,
|
||||
(unsigned long) rc->db_before,
|
||||
time_grouping_method2string(rc->group),
|
||||
rc->after,
|
||||
rc->before
|
||||
time_grouping_id2txt(rc->config.group),
|
||||
rc->config.after,
|
||||
rc->config.before
|
||||
);
|
||||
buffer_data_options2string(wb, rc->options);
|
||||
buffer_data_options2string(wb, rc->config.options);
|
||||
buffer_strcat(wb, "\",\n");
|
||||
}
|
||||
|
||||
if(rc->calculation) {
|
||||
health_string2json(wb, "\t\t\t", "calc", rc->calculation->source, ",\n");
|
||||
health_string2json(wb, "\t\t\t", "calc_parsed", rc->calculation->parsed_as, ",\n");
|
||||
if(rc->config.calculation) {
|
||||
health_string2json(wb, "\t\t\t", "calc", expression_source(rc->config.calculation), ",\n");
|
||||
health_string2json(wb, "\t\t\t", "calc_parsed", expression_parsed_as(rc->config.calculation), ",\n");
|
||||
}
|
||||
|
||||
if(rc->warning) {
|
||||
health_string2json(wb, "\t\t\t", "warn", rc->warning->source, ",\n");
|
||||
health_string2json(wb, "\t\t\t", "warn_parsed", rc->warning->parsed_as, ",\n");
|
||||
if(rc->config.warning) {
|
||||
health_string2json(wb, "\t\t\t", "warn", expression_source(rc->config.warning), ",\n");
|
||||
health_string2json(wb, "\t\t\t", "warn_parsed", expression_parsed_as(rc->config.warning), ",\n");
|
||||
}
|
||||
|
||||
if(rc->critical) {
|
||||
health_string2json(wb, "\t\t\t", "crit", rc->critical->source, ",\n");
|
||||
health_string2json(wb, "\t\t\t", "crit_parsed", rc->critical->parsed_as, ",\n");
|
||||
if(rc->config.critical) {
|
||||
health_string2json(wb, "\t\t\t", "crit", expression_source(rc->config.critical), ",\n");
|
||||
health_string2json(wb, "\t\t\t", "crit_parsed", expression_parsed_as(rc->config.critical), ",\n");
|
||||
}
|
||||
|
||||
buffer_strcat(wb, "\t\t\t\"green\":");
|
||||
buffer_print_netdata_double(wb, rc->green);
|
||||
buffer_print_netdata_double(wb, rc->config.green);
|
||||
buffer_strcat(wb, ",\n");
|
||||
|
||||
buffer_strcat(wb, "\t\t\t\"red\":");
|
||||
buffer_print_netdata_double(wb, rc->red);
|
||||
buffer_print_netdata_double(wb, rc->config.red);
|
||||
buffer_strcat(wb, ",\n");
|
||||
|
||||
buffer_strcat(wb, "\t\t\t\"value\":");
|
||||
|
@ -236,20 +236,13 @@ void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) {
|
|||
"\n\t\"status\": %s,"
|
||||
"\n\t\"now\": %lu,"
|
||||
"\n\t\"alarms\": {\n",
|
||||
rrdhost_hostname(host),
|
||||
(host->health_log.next_log_id > 0)?(host->health_log.next_log_id - 1):0,
|
||||
host->health.health_enabled?"true":"false",
|
||||
(unsigned long)now_realtime_sec());
|
||||
rrdhost_hostname(host),
|
||||
(host->health_log.next_log_id > 0)?(host->health_log.next_log_id - 1):0,
|
||||
host->health.health_enabled?"true":"false",
|
||||
(unsigned long)now_realtime_sec());
|
||||
|
||||
health_alarms2json_fill_alarms(host, wb, all, health_rrdcalc2json_nolock);
|
||||
|
||||
// rrdhost_rdlock(host);
|
||||
// buffer_strcat(wb, "\n\t},\n\t\"templates\": {");
|
||||
// RRDCALCTEMPLATE *rt;
|
||||
// for(rt = host->templates; rt ; rt = rt->next)
|
||||
// health_rrdcalctemplate2json_nolock(wb, rt);
|
||||
// rrdhost_unlock(host);
|
||||
|
||||
buffer_strcat(wb, "\n\t}\n}\n");
|
||||
}
|
||||
|
||||
|
@ -263,3 +256,31 @@ void health_alarms_values2json(RRDHOST *host, BUFFER *wb, int all) {
|
|||
buffer_strcat(wb, "\n\t}\n}\n");
|
||||
}
|
||||
|
||||
void health_entry_flags_to_json_array(BUFFER *wb, const char *key, HEALTH_ENTRY_FLAGS flags) {
|
||||
buffer_json_member_add_array(wb, key);
|
||||
|
||||
if(flags & HEALTH_ENTRY_FLAG_PROCESSED)
|
||||
buffer_json_add_array_item_string(wb, "PROCESSED");
|
||||
if(flags & HEALTH_ENTRY_FLAG_UPDATED)
|
||||
buffer_json_add_array_item_string(wb, "UPDATED");
|
||||
if(flags & HEALTH_ENTRY_FLAG_EXEC_RUN)
|
||||
buffer_json_add_array_item_string(wb, "EXEC_RUN");
|
||||
if(flags & HEALTH_ENTRY_FLAG_EXEC_FAILED)
|
||||
buffer_json_add_array_item_string(wb, "EXEC_FAILED");
|
||||
if(flags & HEALTH_ENTRY_FLAG_SILENCED)
|
||||
buffer_json_add_array_item_string(wb, "SILENCED");
|
||||
if(flags & HEALTH_ENTRY_RUN_ONCE)
|
||||
buffer_json_add_array_item_string(wb, "RUN_ONCE");
|
||||
if(flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS)
|
||||
buffer_json_add_array_item_string(wb, "EXEC_IN_PROGRESS");
|
||||
if(flags & HEALTH_ENTRY_FLAG_IS_REPEATING)
|
||||
buffer_json_add_array_item_string(wb, "RECURRING");
|
||||
if(flags & HEALTH_ENTRY_FLAG_SAVED)
|
||||
buffer_json_add_array_item_string(wb, "SAVED");
|
||||
if(flags & HEALTH_ENTRY_FLAG_ACLK_QUEUED)
|
||||
buffer_json_add_array_item_string(wb, "ACLK_QUEUED");
|
||||
if(flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)
|
||||
buffer_json_add_array_item_string(wb, "NO_CLEAR_NOTIFICATION");
|
||||
|
||||
buffer_json_array_close(wb);
|
||||
}
|
||||
|
|
|
@ -86,31 +86,32 @@ void health_log_alert_transition_with_trace(RRDHOST *host, ALARM_ENTRY *ae, int
|
|||
|
||||
inline ALARM_ENTRY* health_create_alarm_entry(
|
||||
RRDHOST *host,
|
||||
uint32_t alarm_id,
|
||||
uint32_t alarm_event_id,
|
||||
const uuid_t config_hash_id,
|
||||
RRDCALC *rc,
|
||||
time_t when,
|
||||
STRING *name,
|
||||
STRING *chart,
|
||||
STRING *chart_context,
|
||||
STRING *chart_name,
|
||||
STRING *class,
|
||||
STRING *component,
|
||||
STRING *type,
|
||||
STRING *exec,
|
||||
STRING *recipient,
|
||||
time_t duration,
|
||||
NETDATA_DOUBLE old_value,
|
||||
NETDATA_DOUBLE new_value,
|
||||
RRDCALC_STATUS old_status,
|
||||
RRDCALC_STATUS new_status,
|
||||
STRING *source,
|
||||
STRING *units,
|
||||
STRING *summary,
|
||||
STRING *info,
|
||||
int delay,
|
||||
HEALTH_ENTRY_FLAGS flags
|
||||
) {
|
||||
uint32_t alarm_id = rc->id;
|
||||
uint32_t alarm_event_id = rc->next_event_id++;
|
||||
STRING *name = rc->config.name;
|
||||
STRING *chart = rc->rrdset->id;
|
||||
STRING *chart_context = rc->rrdset->context;
|
||||
STRING *chart_name = rc->rrdset->name;
|
||||
STRING *class = rc->config.classification;
|
||||
STRING *component = rc->config.component;
|
||||
STRING *type = rc->config.type;
|
||||
STRING *exec = rc->config.exec;
|
||||
STRING *recipient = rc->config.recipient;
|
||||
STRING *source = rc->config.source;
|
||||
STRING *units = rc->config.units;
|
||||
STRING *summary = rc->summary;
|
||||
STRING *info = rc->info;
|
||||
|
||||
netdata_log_debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id);
|
||||
|
||||
ALARM_ENTRY *ae = callocz(1, sizeof(ALARM_ENTRY));
|
||||
|
@ -119,7 +120,7 @@ inline ALARM_ENTRY* health_create_alarm_entry(
|
|||
ae->chart_context = string_dup(chart_context);
|
||||
ae->chart_name = string_dup(chart_name);
|
||||
|
||||
uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id));
|
||||
uuid_copy(ae->config_hash_id, rc->config.hash_id);
|
||||
|
||||
uuid_generate_random(ae->transition_id);
|
||||
ae->global_id = now_realtime_usec();
|
||||
|
|
569
health/health_notifications.c
Normal file
569
health/health_notifications.c
Normal file
|
@ -0,0 +1,569 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "health_internals.h"
|
||||
|
||||
// the queue of executed alarm notifications that haven't been waited for yet
|
||||
static struct {
|
||||
ALARM_ENTRY *head; // oldest
|
||||
ALARM_ENTRY *tail; // latest
|
||||
} alarm_notifications_in_progress = {NULL, NULL};
|
||||
|
||||
struct health_raised_summary {
|
||||
RRDHOST *host;
|
||||
DICTIONARY *rrdcalc_dict;
|
||||
|
||||
struct {
|
||||
size_t size;
|
||||
size_t used;
|
||||
const DICTIONARY_ITEM **array;
|
||||
} active_alerts;
|
||||
};
|
||||
|
||||
void health_alarm_wait_for_execution(ALARM_ENTRY *ae) {
|
||||
if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS))
|
||||
return;
|
||||
|
||||
spawn_wait_cmd(ae->exec_spawn_serial, &ae->exec_code, &ae->exec_run_timestamp);
|
||||
netdata_log_debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code);
|
||||
ae->flags &= ~HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS;
|
||||
|
||||
if(ae->exec_code != 0)
|
||||
ae->flags |= HEALTH_ENTRY_FLAG_EXEC_FAILED;
|
||||
|
||||
unlink_alarm_notify_in_progress(ae);
|
||||
}
|
||||
|
||||
void wait_for_all_notifications_to_finish_before_allowing_health_to_be_cleaned_up(void) {
|
||||
ALARM_ENTRY *ae;
|
||||
while (NULL != (ae = alarm_notifications_in_progress.head)) {
|
||||
if(unlikely(!service_running(SERVICE_HEALTH)))
|
||||
break;
|
||||
|
||||
health_alarm_wait_for_execution(ae);
|
||||
}
|
||||
}
|
||||
|
||||
void unlink_alarm_notify_in_progress(ALARM_ENTRY *ae)
|
||||
{
|
||||
struct alarm_entry *prev = ae->prev_in_progress;
|
||||
struct alarm_entry *next = ae->next_in_progress;
|
||||
|
||||
if (NULL != prev) {
|
||||
prev->next_in_progress = next;
|
||||
}
|
||||
if (NULL != next) {
|
||||
next->prev_in_progress = prev;
|
||||
}
|
||||
if (ae == alarm_notifications_in_progress.head) {
|
||||
alarm_notifications_in_progress.head = next;
|
||||
}
|
||||
if (ae == alarm_notifications_in_progress.tail) {
|
||||
alarm_notifications_in_progress.tail = prev;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void enqueue_alarm_notify_in_progress(ALARM_ENTRY *ae)
|
||||
{
|
||||
ae->prev_in_progress = NULL;
|
||||
ae->next_in_progress = NULL;
|
||||
|
||||
if (NULL != alarm_notifications_in_progress.tail) {
|
||||
ae->prev_in_progress = alarm_notifications_in_progress.tail;
|
||||
alarm_notifications_in_progress.tail->next_in_progress = ae;
|
||||
}
|
||||
if (NULL == alarm_notifications_in_progress.head) {
|
||||
alarm_notifications_in_progress.head = ae;
|
||||
}
|
||||
alarm_notifications_in_progress.tail = ae;
|
||||
|
||||
}
|
||||
|
||||
static bool prepare_command(BUFFER *wb,
|
||||
const char *exec,
|
||||
const char *recipient,
|
||||
const char *registry_hostname,
|
||||
uint32_t unique_id,
|
||||
uint32_t alarm_id,
|
||||
uint32_t alarm_event_id,
|
||||
uint32_t when,
|
||||
const char *alert_name,
|
||||
const char *alert_chart_name,
|
||||
const char *new_status,
|
||||
const char *old_status,
|
||||
NETDATA_DOUBLE new_value,
|
||||
NETDATA_DOUBLE old_value,
|
||||
const char *alert_source,
|
||||
uint32_t duration,
|
||||
uint32_t non_clear_duration,
|
||||
const char *alert_units,
|
||||
const char *alert_info,
|
||||
const char *new_value_string,
|
||||
const char *old_value_string,
|
||||
const char *source,
|
||||
const char *error_msg,
|
||||
int n_warn,
|
||||
int n_crit,
|
||||
const char *warn_alarms,
|
||||
const char *crit_alarms,
|
||||
const char *classification,
|
||||
const char *edit_command,
|
||||
const char *machine_guid,
|
||||
uuid_t *transition_id,
|
||||
const char *summary,
|
||||
const char *context,
|
||||
const char *component,
|
||||
const char *type
|
||||
) {
|
||||
char buf[8192];
|
||||
size_t n = sizeof(buf) - 1;
|
||||
|
||||
buffer_strcat(wb, "exec");
|
||||
|
||||
if (!sanitize_command_argument_string(buf, exec, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, recipient, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, registry_hostname, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
buffer_sprintf(wb, " '%u'", unique_id);
|
||||
|
||||
buffer_sprintf(wb, " '%u'", alarm_id);
|
||||
|
||||
buffer_sprintf(wb, " '%u'", alarm_event_id);
|
||||
|
||||
buffer_sprintf(wb, " '%u'", when);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, alert_name, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, alert_chart_name, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, new_status, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, old_status, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
buffer_sprintf(wb, " '" NETDATA_DOUBLE_FORMAT_ZERO "'", new_value);
|
||||
|
||||
buffer_sprintf(wb, " '" NETDATA_DOUBLE_FORMAT_ZERO "'", old_value);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, alert_source, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
buffer_sprintf(wb, " '%u'", duration);
|
||||
|
||||
buffer_sprintf(wb, " '%u'", non_clear_duration);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, alert_units, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, alert_info, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, new_value_string, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, old_value_string, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, source, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, error_msg, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
buffer_sprintf(wb, " '%d'", n_warn);
|
||||
|
||||
buffer_sprintf(wb, " '%d'", n_crit);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, warn_alarms, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, crit_alarms, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, classification, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, edit_command, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, machine_guid, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
char tr_id[UUID_STR_LEN];
|
||||
uuid_unparse_lower(*transition_id, tr_id);
|
||||
if (!sanitize_command_argument_string(buf, tr_id, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, summary, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, context, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, component, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
if (!sanitize_command_argument_string(buf, type, n))
|
||||
return false;
|
||||
buffer_sprintf(wb, " '%s'", buf);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int compare_raised_alerts(const void *a, const void *b) {
|
||||
const DICTIONARY_ITEM *item1 = *(const DICTIONARY_ITEM **)a;
|
||||
const DICTIONARY_ITEM *item2 = *(const DICTIONARY_ITEM **)b;
|
||||
|
||||
RRDCALC *rc1 = dictionary_acquired_item_value(item1);
|
||||
RRDCALC *rc2 = dictionary_acquired_item_value(item2);
|
||||
|
||||
return (int)(rc2->last_status_change - rc1->last_status_change);
|
||||
}
|
||||
|
||||
static void health_raised_summary_add_alert(struct health_raised_summary *hrm, const DICTIONARY_ITEM *item) {
|
||||
if(hrm->active_alerts.used >= hrm->active_alerts.size) {
|
||||
if(hrm->active_alerts.size == 0)
|
||||
hrm->active_alerts.size = 2;
|
||||
|
||||
hrm->active_alerts.size *= 2;
|
||||
hrm->active_alerts.array = reallocz(hrm->active_alerts.array, sizeof(const DICTIONARY_ITEM *) * hrm->active_alerts.size);
|
||||
}
|
||||
|
||||
hrm->active_alerts.array[hrm->active_alerts.used++] = dictionary_acquired_item_dup(hrm->rrdcalc_dict, item);
|
||||
}
|
||||
|
||||
void alerts_raised_summary_free(struct health_raised_summary *hrm) {
|
||||
for(size_t i = 0; i < hrm->active_alerts.used ;i++)
|
||||
dictionary_acquired_item_release(hrm->rrdcalc_dict, hrm->active_alerts.array[i]);
|
||||
|
||||
freez(hrm->active_alerts.array);
|
||||
freez(hrm);
|
||||
}
|
||||
|
||||
struct health_raised_summary *alerts_raised_summary_create(RRDHOST *host) {
|
||||
struct health_raised_summary *hrm = callocz(1, sizeof(*hrm));
|
||||
hrm->rrdcalc_dict = host->rrdcalc_root_index;
|
||||
hrm->host = host;
|
||||
return hrm;
|
||||
}
|
||||
|
||||
void alerts_raised_summary_populate(struct health_raised_summary *hrm) {
|
||||
RRDCALC *rc;
|
||||
foreach_rrdcalc_in_rrdhost_read(hrm->host, rc) {
|
||||
if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) continue;
|
||||
health_raised_summary_add_alert(hrm, rc_dfe.item);
|
||||
}
|
||||
foreach_rrdcalc_in_rrdhost_done(rc);
|
||||
|
||||
if (hrm->active_alerts.used > 1)
|
||||
qsort(hrm->active_alerts.array, hrm->active_alerts.used, sizeof(const DICTIONARY_ITEM *), compare_raised_alerts);
|
||||
}
|
||||
|
||||
static size_t
|
||||
health_raised_summary_entries(struct health_raised_summary *hrm, BUFFER *dst, ALARM_ENTRY *ae, RRDCALC_STATUS status) {
|
||||
buffer_flush(dst);
|
||||
|
||||
size_t count = 0;
|
||||
for(size_t i = 0; i < hrm->active_alerts.used ;i++) {
|
||||
RRDCALC *rc = dictionary_acquired_item_value(hrm->active_alerts.array[i]);
|
||||
if(rc->status != status) continue;
|
||||
if(rc->id == ae->alarm_id) continue;
|
||||
|
||||
count++;
|
||||
if(buffer_strlen(dst)) buffer_putc(dst, ',');
|
||||
buffer_sprintf(dst, "%s=%" PRId64, string2str(rc->config.name), (int64_t)rc->last_status_change);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const char *health_raised_summary_my_expression_source(struct health_raised_summary *hrm, ALARM_ENTRY *ae) {
|
||||
for(size_t i = 0; i < hrm->active_alerts.used ;i++) {
|
||||
RRDCALC *rc = dictionary_acquired_item_value(hrm->active_alerts.array[i]);
|
||||
if(rc->id != ae->alarm_id) continue;
|
||||
|
||||
if(rc->status == RRDCALC_STATUS_CRITICAL)
|
||||
return expression_source(rc->config.critical);
|
||||
else
|
||||
return expression_source(rc->config.warning);
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
static const char *health_raised_summary_my_expression_error(struct health_raised_summary *hrm, ALARM_ENTRY *ae) {
|
||||
for(size_t i = 0; i < hrm->active_alerts.used ;i++) {
|
||||
RRDCALC *rc = dictionary_acquired_item_value(hrm->active_alerts.array[i]);
|
||||
if(rc->id != ae->alarm_id) continue;
|
||||
|
||||
if(rc->status == RRDCALC_STATUS_CRITICAL)
|
||||
return expression_error_msg(rc->config.critical);
|
||||
else
|
||||
return expression_error_msg(rc->config.warning);
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
void health_send_notification(RRDHOST *host, ALARM_ENTRY *ae, struct health_raised_summary *hrm) {
|
||||
netdata_log_debug(D_HEALTH, "Health alarm '%s.%s' = " NETDATA_DOUBLE_FORMAT_AUTO " - changed status from %s to %s",
|
||||
ae->chart?ae_chart_id(ae):"NOCHART", ae_name(ae),
|
||||
ae->new_value,
|
||||
rrdcalc_status2string(ae->old_status),
|
||||
rrdcalc_status2string(ae->new_status)
|
||||
);
|
||||
|
||||
ae->flags |= HEALTH_ENTRY_FLAG_PROCESSED;
|
||||
|
||||
if(unlikely(ae->new_status < RRDCALC_STATUS_CLEAR)) {
|
||||
// do not send notifications for internal statuses
|
||||
netdata_log_debug(D_HEALTH, "Health not sending notification for alarm '%s.%s' status %s (internal statuses)", ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
|
||||
goto done;
|
||||
}
|
||||
|
||||
if(unlikely(ae->new_status <= RRDCALC_STATUS_CLEAR && (ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION))) {
|
||||
// do not send notifications for disabled statuses
|
||||
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Health not sending notification for alarm '%s.%s' status %s (it has no-clear-notification enabled)",
|
||||
rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
|
||||
|
||||
// mark it as run, so that we will send the same alarm if it happens again
|
||||
goto done;
|
||||
}
|
||||
|
||||
// find the previous notification for the same alarm
|
||||
// which we have run the exec script
|
||||
// exception: alarms with HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION set
|
||||
RRDCALC_STATUS last_executed_status = -3;
|
||||
if(likely(!(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION))) {
|
||||
int ret = sql_health_get_last_executed_event(host, ae, &last_executed_status);
|
||||
|
||||
if (likely(ret == 1)) {
|
||||
// we have executed this alarm notification in the past
|
||||
if(last_executed_status == ae->new_status && !(ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING)) {
|
||||
// don't send the notification for the same status again
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Health not sending again notification for alarm '%s.%s' status %s",
|
||||
rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae),
|
||||
rrdcalc_status2string(ae->new_status));
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// we have not executed this alarm notification in the past
|
||||
// so, don't send CLEAR notifications
|
||||
if(unlikely(ae->new_status == RRDCALC_STATUS_CLEAR)) {
|
||||
if((!(ae->flags & HEALTH_ENTRY_RUN_ONCE)) || (ae->flags & HEALTH_ENTRY_RUN_ONCE && ae->old_status < RRDCALC_STATUS_RAISED) ) {
|
||||
netdata_log_debug(D_HEALTH, "Health not sending notification for first initialization of alarm '%s.%s' status %s"
|
||||
, ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if alarm notifications are silenced
|
||||
if (ae->flags & HEALTH_ENTRY_FLAG_SILENCED) {
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Health not sending notification for alarm '%s.%s' status %s "
|
||||
"(command API has disabled notifications)",
|
||||
rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
|
||||
goto done;
|
||||
}
|
||||
|
||||
nd_log(NDLS_DAEMON, NDLP_DEBUG,
|
||||
"[%s]: Sending notification for alarm '%s.%s' status %s.",
|
||||
rrdhost_hostname(host), ae_chart_id(ae), ae_name(ae), rrdcalc_status2string(ae->new_status));
|
||||
|
||||
const char *exec = (ae->exec) ? ae_exec(ae) : string2str(host->health.health_default_exec);
|
||||
const char *recipient = (ae->recipient) ? ae_recipient(ae) : string2str(host->health.health_default_recipient);
|
||||
|
||||
char *edit_command = ae->source ? health_edit_command_from_source(ae_source(ae)) : strdupz("UNKNOWN=0=UNKNOWN");
|
||||
|
||||
BUFFER *warn_alarms = buffer_create(1024, &netdata_buffers_statistics.buffers_health);
|
||||
BUFFER *crit_alarms = buffer_create(1024, &netdata_buffers_statistics.buffers_health);
|
||||
|
||||
size_t n_warn = health_raised_summary_entries(hrm, warn_alarms, ae, RRDCALC_STATUS_WARNING);
|
||||
size_t n_crit = health_raised_summary_entries(hrm, crit_alarms, ae, RRDCALC_STATUS_CRITICAL);
|
||||
|
||||
BUFFER *wb = buffer_create(8192, &netdata_buffers_statistics.buffers_health);
|
||||
bool ok = prepare_command(wb,
|
||||
exec,
|
||||
recipient,
|
||||
rrdhost_registry_hostname(host),
|
||||
ae->unique_id,
|
||||
ae->alarm_id,
|
||||
ae->alarm_event_id,
|
||||
(unsigned long)ae->when,
|
||||
ae_name(ae),
|
||||
ae->chart?ae_chart_id(ae):"NOCHART",
|
||||
rrdcalc_status2string(ae->new_status),
|
||||
rrdcalc_status2string(ae->old_status),
|
||||
ae->new_value,
|
||||
ae->old_value,
|
||||
ae->source?ae_source(ae):"UNKNOWN",
|
||||
(uint32_t)ae->duration,
|
||||
(ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING && ae->new_status >= RRDCALC_STATUS_WARNING) ? (uint32_t)ae->duration : (uint32_t)ae->non_clear_duration,
|
||||
ae_units(ae),
|
||||
ae_info(ae),
|
||||
ae_new_value_string(ae),
|
||||
ae_old_value_string(ae),
|
||||
health_raised_summary_my_expression_source(hrm, ae),
|
||||
health_raised_summary_my_expression_error(hrm, ae),
|
||||
n_warn,
|
||||
n_crit,
|
||||
buffer_tostring(warn_alarms),
|
||||
buffer_tostring(crit_alarms),
|
||||
ae->classification?ae_classification(ae):"Unknown",
|
||||
edit_command,
|
||||
host->machine_guid,
|
||||
&ae->transition_id,
|
||||
host->health.use_summary_for_notifications && ae->summary?ae_summary(ae):ae_name(ae),
|
||||
string2str(ae->chart_context),
|
||||
string2str(ae->component),
|
||||
string2str(ae->type)
|
||||
);
|
||||
|
||||
const char *command_to_run = buffer_tostring(wb);
|
||||
if (ok) {
|
||||
ae->flags |= HEALTH_ENTRY_FLAG_EXEC_RUN;
|
||||
ae->exec_run_timestamp = now_realtime_sec(); /* will be updated by real time after spawning */
|
||||
|
||||
netdata_log_debug(D_HEALTH, "executing command '%s'", command_to_run);
|
||||
ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS;
|
||||
ae->exec_spawn_serial = spawn_enq_cmd(command_to_run);
|
||||
enqueue_alarm_notify_in_progress(ae);
|
||||
health_alarm_log_save(host, ae);
|
||||
} else {
|
||||
netdata_log_error("Failed to format command arguments");
|
||||
}
|
||||
|
||||
buffer_free(warn_alarms);
|
||||
buffer_free(crit_alarms);
|
||||
buffer_free(wb);
|
||||
freez(edit_command);
|
||||
|
||||
return; //health_alarm_wait_for_execution
|
||||
done:
|
||||
health_alarm_log_save(host, ae);
|
||||
}
|
||||
|
||||
bool health_alarm_log_get_global_id_and_transition_id_for_rrdcalc(RRDCALC *rc, usec_t *global_id, uuid_t *transitions_id) {
|
||||
if(!rc->rrdset)
|
||||
return false;
|
||||
|
||||
RRDHOST *host = rc->rrdset->rrdhost;
|
||||
|
||||
rw_spinlock_read_lock(&host->health_log.spinlock);
|
||||
|
||||
ALARM_ENTRY *ae;
|
||||
for(ae = host->health_log.alarms; ae ; ae = ae->next) {
|
||||
if(unlikely(ae->alarm_id == rc->id))
|
||||
break;
|
||||
}
|
||||
|
||||
if(ae) {
|
||||
*global_id = ae->global_id;
|
||||
uuid_copy(*transitions_id, ae->transition_id);
|
||||
}
|
||||
else {
|
||||
*global_id = 0;
|
||||
uuid_clear(*transitions_id);
|
||||
}
|
||||
|
||||
rw_spinlock_read_unlock(&host->health_log.spinlock);
|
||||
|
||||
return ae != NULL;
|
||||
}
|
||||
|
||||
void health_alarm_log_process_to_send_notifications(RRDHOST *host, struct health_raised_summary *hrm) {
|
||||
uint32_t first_waiting = (host->health_log.alarms)?host->health_log.alarms->unique_id:0;
|
||||
time_t now = now_realtime_sec();
|
||||
|
||||
rw_spinlock_read_lock(&host->health_log.spinlock);
|
||||
|
||||
ALARM_ENTRY *ae;
|
||||
for(ae = host->health_log.alarms; ae && ae->unique_id >= host->health_last_processed_id; ae = ae->next) {
|
||||
if(unlikely(
|
||||
!(ae->flags & HEALTH_ENTRY_FLAG_PROCESSED) &&
|
||||
!(ae->flags & HEALTH_ENTRY_FLAG_UPDATED)
|
||||
)) {
|
||||
if(unlikely(ae->unique_id < first_waiting))
|
||||
first_waiting = ae->unique_id;
|
||||
|
||||
if(likely(now >= ae->delay_up_to_timestamp))
|
||||
health_send_notification(host, ae, hrm);
|
||||
}
|
||||
}
|
||||
|
||||
rw_spinlock_read_unlock(&host->health_log.spinlock);
|
||||
|
||||
// remember this for the next iteration
|
||||
host->health_last_processed_id = first_waiting;
|
||||
|
||||
//delete those that are updated, no in progress execution, and is not repeating
|
||||
rw_spinlock_write_lock(&host->health_log.spinlock);
|
||||
|
||||
ALARM_ENTRY *prev = NULL, *next = NULL;
|
||||
for(ae = host->health_log.alarms; ae ; ae = next) {
|
||||
next = ae->next; // set it here, for the next iteration
|
||||
|
||||
if((likely(!(ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING)) &&
|
||||
(ae->flags & HEALTH_ENTRY_FLAG_UPDATED) &&
|
||||
(ae->flags & HEALTH_ENTRY_FLAG_SAVED) &&
|
||||
!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS))
|
||||
||
|
||||
((ae->new_status == RRDCALC_STATUS_REMOVED) &&
|
||||
(ae->flags & HEALTH_ENTRY_FLAG_SAVED) &&
|
||||
(ae->when + 86400 < now_realtime_sec())))
|
||||
{
|
||||
|
||||
if(host->health_log.alarms == ae) {
|
||||
host->health_log.alarms = next;
|
||||
// prev is also NULL here
|
||||
}
|
||||
else {
|
||||
prev->next = next;
|
||||
// prev should not be touched here - we need it for the next iteration
|
||||
// because we may have to also remove the next item
|
||||
}
|
||||
|
||||
health_alarm_log_free_one_nochecks_nounlink(ae);
|
||||
}
|
||||
else
|
||||
prev = ae;
|
||||
}
|
||||
|
||||
rw_spinlock_write_unlock(&host->health_log.spinlock);
|
||||
}
|
616
health/health_prototypes.c
Normal file
616
health/health_prototypes.c
Normal file
|
@ -0,0 +1,616 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "health_internals.h"
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
uint32_t hash;
|
||||
ALERT_ACTION_OPTIONS value;
|
||||
} alert_action_options[] = {
|
||||
{ "no-clear-notification", 0 , ALERT_ACTION_OPTION_NO_CLEAR_NOTIFICATION}
|
||||
|
||||
// terminator
|
||||
, {NULL, 0, 0}
|
||||
};
|
||||
|
||||
inline ALERT_ACTION_OPTIONS alert_action_options_parse_one(const char *o) {
|
||||
ALERT_ACTION_OPTIONS ret = 0;
|
||||
|
||||
if(!o || !*o) return ret;
|
||||
|
||||
uint32_t hash = simple_hash(o);
|
||||
int i;
|
||||
for(i = 0; alert_action_options[i].name ; i++) {
|
||||
if (unlikely(hash == alert_action_options[i].hash && !strcmp(o, alert_action_options[i].name))) {
|
||||
ret |= alert_action_options[i].value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline ALERT_ACTION_OPTIONS alert_action_options_parse(char *o) {
|
||||
ALERT_ACTION_OPTIONS ret = 0;
|
||||
char *tok;
|
||||
|
||||
while(o && *o && (tok = strsep_skip_consecutive_separators(&o, ", |"))) {
|
||||
if(!*tok) continue;
|
||||
ret |= alert_action_options_parse_one(tok);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void alert_action_options_to_buffer_json_array(BUFFER *wb, const char *key, ALERT_ACTION_OPTIONS options) {
|
||||
buffer_json_member_add_array(wb, key);
|
||||
|
||||
RRDR_OPTIONS used = 0; // to prevent adding duplicates
|
||||
for(int i = 0; alert_action_options[i].name ; i++) {
|
||||
if (unlikely((alert_action_options[i].value & options) && !(alert_action_options[i].value & used))) {
|
||||
const char *name = alert_action_options[i].name;
|
||||
used |= alert_action_options[i].value;
|
||||
|
||||
buffer_json_add_array_item_string(wb, name);
|
||||
}
|
||||
}
|
||||
|
||||
buffer_json_array_close(wb);
|
||||
}
|
||||
|
||||
static void alert_action_options_init(void) {
|
||||
for(int i = 0; alert_action_options[i].name ; i++)
|
||||
alert_action_options[i].hash = simple_hash(alert_action_options[i].name);
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
static void health_prototype_cleanup_one_unsafe(RRD_ALERT_PROTOTYPE *ap) {
|
||||
rrd_alert_match_cleanup(&ap->match);
|
||||
rrd_alert_config_cleanup(&ap->config);
|
||||
}
|
||||
|
||||
void health_prototype_cleanup(RRD_ALERT_PROTOTYPE *ap) {
|
||||
spinlock_lock(&ap->_internal.spinlock);
|
||||
|
||||
while(ap->_internal.next) {
|
||||
RRD_ALERT_PROTOTYPE *t = ap->_internal.next;
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ap->_internal.next, t, _internal.prev, _internal.next);
|
||||
health_prototype_cleanup_one_unsafe(t);
|
||||
freez(t);
|
||||
}
|
||||
|
||||
spinlock_unlock(&ap->_internal.spinlock);
|
||||
|
||||
health_prototype_cleanup_one_unsafe(ap);
|
||||
}
|
||||
|
||||
void health_prototype_free(RRD_ALERT_PROTOTYPE *ap) {
|
||||
if(!ap) return;
|
||||
health_prototype_cleanup(ap);
|
||||
freez(ap);
|
||||
}
|
||||
|
||||
void health_prototype_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
|
||||
RRD_ALERT_PROTOTYPE *ap = value;
|
||||
spinlock_init(&ap->_internal.spinlock);
|
||||
if(ap->config.source_type != DYNCFG_SOURCE_TYPE_DYNCFG)
|
||||
ap->_internal.is_on_disk = true;
|
||||
}
|
||||
|
||||
bool health_prototype_conflict_cb(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) {
|
||||
RRD_ALERT_PROTOTYPE *ap = old_value;
|
||||
RRD_ALERT_PROTOTYPE *nap = new_value;
|
||||
|
||||
bool replace = nap->config.source_type == DYNCFG_SOURCE_TYPE_DYNCFG;
|
||||
|
||||
if(ap->config.source_type != DYNCFG_SOURCE_TYPE_DYNCFG || nap->config.source_type != DYNCFG_SOURCE_TYPE_DYNCFG)
|
||||
ap->_internal.is_on_disk = nap->_internal.is_on_disk = true;
|
||||
|
||||
if(!replace) {
|
||||
if(ap->config.source_type == DYNCFG_SOURCE_TYPE_DYNCFG) {
|
||||
// the existing is a dyncfg and the new one is read from the config
|
||||
health_prototype_cleanup(nap);
|
||||
memset(nap, 0, sizeof(*nap));
|
||||
}
|
||||
else {
|
||||
// alerts with the same name are appended to the existing one
|
||||
nap = callocz(1, sizeof(*nap));
|
||||
memcpy(nap, new_value, sizeof(*nap));
|
||||
|
||||
spinlock_lock(&ap->_internal.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ap->_internal.next, nap, _internal.prev, _internal.next);
|
||||
spinlock_unlock(&ap->_internal.spinlock);
|
||||
|
||||
if(nap->_internal.enabled)
|
||||
ap->_internal.enabled = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// alerts with the same name replace the existing one
|
||||
spinlock_init(&nap->_internal.spinlock);
|
||||
nap->_internal.uses = ap->_internal.uses;
|
||||
|
||||
spinlock_lock(&nap->_internal.spinlock);
|
||||
spinlock_lock(&ap->_internal.spinlock);
|
||||
SWAP(*ap, *nap);
|
||||
spinlock_unlock(&ap->_internal.spinlock);
|
||||
spinlock_unlock(&nap->_internal.spinlock);
|
||||
|
||||
health_prototype_cleanup(nap);
|
||||
memset(nap, 0, sizeof(*nap));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void health_prototype_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
|
||||
RRD_ALERT_PROTOTYPE *ap = value;
|
||||
health_prototype_cleanup(ap);
|
||||
}
|
||||
|
||||
void health_init_prototypes(void) {
|
||||
if(health_globals.prototypes.dict)
|
||||
return;
|
||||
|
||||
health_globals.prototypes.dict = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE);
|
||||
dictionary_register_insert_callback(health_globals.prototypes.dict, health_prototype_insert_cb, NULL);
|
||||
dictionary_register_conflict_callback(health_globals.prototypes.dict, health_prototype_conflict_cb, NULL);
|
||||
dictionary_register_delete_callback(health_globals.prototypes.dict, health_prototype_delete_cb, NULL);
|
||||
|
||||
alert_action_options_init();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
// If needed, add a prefix key to all possible values in the range
|
||||
static inline char *health_config_add_key_to_values(char *value) {
|
||||
BUFFER *wb = buffer_create(HEALTH_CONF_MAX_LINE + 1, NULL);
|
||||
char key[HEALTH_CONF_MAX_LINE + 1];
|
||||
char data[HEALTH_CONF_MAX_LINE + 1];
|
||||
|
||||
char *s = value;
|
||||
size_t i = 0;
|
||||
|
||||
key[0] = '\0';
|
||||
while(*s) {
|
||||
if (*s == '=') {
|
||||
//hold the key
|
||||
data[i]='\0';
|
||||
strncpyz(key, data, HEALTH_CONF_MAX_LINE);
|
||||
i=0;
|
||||
} else if (*s == ' ') {
|
||||
data[i]='\0';
|
||||
if (data[0]=='!')
|
||||
buffer_snprintf(wb, HEALTH_CONF_MAX_LINE, "!%s=%s ", key, data + 1);
|
||||
else
|
||||
buffer_snprintf(wb, HEALTH_CONF_MAX_LINE, "%s=%s ", key, data);
|
||||
i=0;
|
||||
} else {
|
||||
data[i++] = *s;
|
||||
}
|
||||
s++;
|
||||
}
|
||||
|
||||
data[i]='\0';
|
||||
if (data[0]) {
|
||||
if (data[0]=='!')
|
||||
buffer_snprintf(wb, HEALTH_CONF_MAX_LINE, "!%s=%s ", key, data + 1);
|
||||
else
|
||||
buffer_snprintf(wb, HEALTH_CONF_MAX_LINE, "%s=%s ", key, data);
|
||||
}
|
||||
|
||||
char *final = strdupz(buffer_tostring(wb));
|
||||
buffer_free(wb);
|
||||
|
||||
return final;
|
||||
}
|
||||
|
||||
static void health_prototype_activate_match_patterns(struct rrd_alert_match *am) {
|
||||
if(am->os) {
|
||||
simple_pattern_free(am->os_pattern);
|
||||
|
||||
char *tmp = simple_pattern_trim_around_equal(string2str(am->os));
|
||||
am->os_pattern = simple_pattern_create(
|
||||
tmp, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
freez(tmp);
|
||||
}
|
||||
|
||||
if(am->host) {
|
||||
simple_pattern_free(am->host_pattern);
|
||||
|
||||
char *tmp = simple_pattern_trim_around_equal(string2str(am->host));
|
||||
am->host_pattern = simple_pattern_create(
|
||||
tmp, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
freez(tmp);
|
||||
}
|
||||
|
||||
if(am->charts) {
|
||||
simple_pattern_free(am->charts_pattern);
|
||||
|
||||
char *tmp = simple_pattern_trim_around_equal(string2str(am->charts));
|
||||
am->charts_pattern = simple_pattern_create(
|
||||
tmp, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
freez(tmp);
|
||||
}
|
||||
|
||||
if(am->plugin) {
|
||||
simple_pattern_free(am->plugin_pattern);
|
||||
|
||||
char *tmp = simple_pattern_trim_around_equal(string2str(am->plugin));
|
||||
am->plugin_pattern = simple_pattern_create(
|
||||
tmp, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
freez(tmp);
|
||||
}
|
||||
|
||||
if(am->module) {
|
||||
simple_pattern_free(am->module_pattern);
|
||||
|
||||
char *tmp = simple_pattern_trim_around_equal(string2str(am->module));
|
||||
am->module_pattern = simple_pattern_create(
|
||||
tmp, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
freez(tmp);
|
||||
}
|
||||
|
||||
if(am->host_labels) {
|
||||
simple_pattern_free(am->host_labels_pattern);
|
||||
|
||||
char *tmp = simple_pattern_trim_around_equal(string2str(am->host_labels));
|
||||
am->host_labels_pattern = simple_pattern_create(
|
||||
tmp, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
freez(tmp);
|
||||
}
|
||||
|
||||
if(am->chart_labels) {
|
||||
simple_pattern_free(am->chart_labels_pattern);
|
||||
|
||||
char *tmp = simple_pattern_trim_around_equal(string2str(am->chart_labels));
|
||||
char *tmp2 = health_config_add_key_to_values(tmp);
|
||||
am->chart_labels_pattern = simple_pattern_create(
|
||||
tmp2, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
freez(tmp2);
|
||||
freez(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void health_prototype_hash_id(RRD_ALERT_PROTOTYPE *ap) {
|
||||
CLEAN_BUFFER *wb = buffer_create(100, NULL);
|
||||
health_prototype_to_json(wb, ap, true);
|
||||
UUID uuid = UUID_generate_from_hash(buffer_tostring(wb), buffer_strlen(wb));
|
||||
uuid_copy(ap->config.hash_id, uuid.uuid);
|
||||
|
||||
(void) sql_alert_store_config(ap);
|
||||
}
|
||||
|
||||
bool health_prototype_add(RRD_ALERT_PROTOTYPE *ap) {
|
||||
if(!ap->match.is_template) {
|
||||
if(!ap->match.on.chart) {
|
||||
netdata_log_error(
|
||||
"HEALTH: alert '%s' does not define a instance (parameter 'on'). Source: %s",
|
||||
string2str(ap->config.name), string2str(ap->config.source));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if(!ap->match.on.context) {
|
||||
netdata_log_error(
|
||||
"HEALTH: alert '%s' does not define a context (parameter 'on'). Source: %s",
|
||||
string2str(ap->config.name), string2str(ap->config.source));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if(!ap->config.update_every) {
|
||||
netdata_log_error(
|
||||
"HEALTH: alert '%s' has no frequency (parameter 'every'). Source: %s",
|
||||
string2str(ap->config.name), string2str(ap->config.source));
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!RRDCALC_HAS_DB_LOOKUP(ap) && !ap->config.calculation && !ap->config.warning && !ap->config.critical) {
|
||||
netdata_log_error(
|
||||
"HEALTH: alert '%s' is useless (no db lookup, no calculation, no warning and no critical expressions). Source: %s",
|
||||
string2str(ap->config.name), string2str(ap->config.source));
|
||||
return false;
|
||||
}
|
||||
|
||||
// activate the match patterns in it
|
||||
bool enabled = false;
|
||||
for(RRD_ALERT_PROTOTYPE *t = ap; t ;t = t->_internal.next) {
|
||||
// we need to generate config_hash_id for each instance included
|
||||
// so, let's break the linked list for this iteration
|
||||
|
||||
RRD_ALERT_PROTOTYPE *prev = t->_internal.prev;
|
||||
RRD_ALERT_PROTOTYPE *next = t->_internal.next;
|
||||
t->_internal.prev = t;
|
||||
t->_internal.next = NULL;
|
||||
|
||||
if(t->match.enabled)
|
||||
enabled = true;
|
||||
|
||||
if(!t->config.name)
|
||||
t->config.name = string_dup(ap->config.name);
|
||||
|
||||
health_prototype_hash_id(t);
|
||||
|
||||
health_prototype_activate_match_patterns(&t->match);
|
||||
|
||||
if (!t->config.exec)
|
||||
t->config.exec = string_dup(health_globals.config.default_exec);
|
||||
|
||||
if (!t->config.recipient)
|
||||
t->config.recipient = string_dup(health_globals.config.default_recipient);
|
||||
|
||||
// restore the linked list
|
||||
t->_internal.prev = prev;
|
||||
t->_internal.next = next;
|
||||
}
|
||||
ap->_internal.enabled = enabled;
|
||||
|
||||
// add it to the prototypes
|
||||
dictionary_set_advanced(health_globals.prototypes.dict,
|
||||
string2str(ap->config.name), string_strlen(ap->config.name),
|
||||
ap, sizeof(*ap),
|
||||
NULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
void health_reload_prototypes(void) {
|
||||
// remove all dyncfg related to prototypes
|
||||
health_dyncfg_unregister_all_prototypes();
|
||||
|
||||
// clear old prototypes from memory
|
||||
dictionary_flush(health_globals.prototypes.dict);
|
||||
|
||||
// load the prototypes from disk
|
||||
recursive_config_double_dir_load(
|
||||
health_user_config_dir(),
|
||||
health_globals.config.stock_enabled ? health_stock_config_dir() : NULL,
|
||||
NULL,
|
||||
health_readfile,
|
||||
NULL, 0);
|
||||
|
||||
// register all loaded prototypes
|
||||
health_dyncfg_register_all_prototypes();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
static bool prototype_matches_host(RRDHOST *host, RRD_ALERT_PROTOTYPE *ap) {
|
||||
if(health_globals.config.enabled_alerts &&
|
||||
!simple_pattern_matches(health_globals.config.enabled_alerts, string2str(ap->config.name)))
|
||||
return false;
|
||||
|
||||
if(ap->match.os_pattern && !simple_pattern_matches_string(ap->match.os_pattern, host->os))
|
||||
return false;
|
||||
|
||||
if(ap->match.host_pattern && !simple_pattern_matches_string(ap->match.host_pattern, host->hostname))
|
||||
return false;
|
||||
|
||||
if(host->rrdlabels && ap->match.host_labels_pattern &&
|
||||
!rrdlabels_match_simple_pattern_parsed(
|
||||
host->rrdlabels, ap->match.host_labels_pattern, '=', NULL))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool prototype_matches_rrdset(RRDSET *st, RRD_ALERT_PROTOTYPE *ap) {
|
||||
// match the chart id
|
||||
if(!ap->match.is_template && ap->match.on.chart &&
|
||||
ap->match.on.chart != st->id && ap->match.on.chart != st->name)
|
||||
return false;
|
||||
|
||||
// match the chart context
|
||||
if(ap->match.is_template && ap->match.on.context &&
|
||||
ap->match.on.context != st->context)
|
||||
return false;
|
||||
|
||||
// match the chart pattern
|
||||
if(ap->match.is_template && ap->match.charts && ap->match.charts_pattern &&
|
||||
!simple_pattern_matches_string(ap->match.charts_pattern, st->id) &&
|
||||
!simple_pattern_matches_string(ap->match.charts_pattern, st->name))
|
||||
return false;
|
||||
|
||||
// match the plugin pattern
|
||||
if(ap->match.plugin && ap->match.plugin_pattern &&
|
||||
!simple_pattern_matches_string(ap->match.plugin_pattern, st->plugin_name))
|
||||
return false;
|
||||
|
||||
// match the module pattern
|
||||
if(ap->match.module && ap->match.module_pattern &&
|
||||
!simple_pattern_matches_string(ap->match.module_pattern, st->module_name))
|
||||
return false;
|
||||
|
||||
if (st->rrdlabels && ap->match.chart_labels && ap->match.chart_labels_pattern &&
|
||||
!rrdlabels_match_simple_pattern_parsed(
|
||||
st->rrdlabels, ap->match.chart_labels_pattern, '=', NULL))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void health_prototype_copy_match_without_patterns(struct rrd_alert_match *dst, struct rrd_alert_match *src) {
|
||||
dst->enabled = src->enabled;
|
||||
dst->is_template = src->is_template;
|
||||
|
||||
if(dst->is_template)
|
||||
dst->on.context = string_dup(src->on.context);
|
||||
else
|
||||
dst->on.chart = string_dup(src->on.chart);
|
||||
|
||||
dst->os = string_dup(src->os);
|
||||
dst->host = string_dup(src->host);
|
||||
dst->charts = string_dup(src->charts);
|
||||
dst->plugin = string_dup(src->plugin);
|
||||
dst->module = string_dup(src->module);
|
||||
dst->host_labels = string_dup(src->host_labels);
|
||||
dst->chart_labels = string_dup(src->chart_labels);
|
||||
}
|
||||
|
||||
void health_prototype_copy_config(struct rrd_alert_config *dst, struct rrd_alert_config *src) {
|
||||
uuid_copy(dst->hash_id, src->hash_id);
|
||||
|
||||
dst->name = string_dup(src->name);
|
||||
|
||||
dst->exec = string_dup(src->exec);
|
||||
dst->recipient = string_dup(src->recipient);
|
||||
|
||||
dst->classification = string_dup(src->classification);
|
||||
dst->component = string_dup(src->component);
|
||||
dst->type = string_dup(src->type);
|
||||
|
||||
dst->source_type = src->source_type;
|
||||
dst->source = string_dup(src->source);
|
||||
dst->units = string_dup(src->units);
|
||||
dst->summary = string_dup(src->summary);
|
||||
dst->info = string_dup(src->info);
|
||||
|
||||
dst->update_every = src->update_every;
|
||||
|
||||
dst->green = src->green;
|
||||
dst->red = src->red;
|
||||
|
||||
dst->dimensions = string_dup(src->dimensions);
|
||||
|
||||
dst->group = src->group;
|
||||
dst->before = src->before;
|
||||
dst->after = src->after;
|
||||
dst->options = src->options;
|
||||
|
||||
const char *failed_at = NULL;
|
||||
int error = 0;
|
||||
|
||||
dst->calculation = expression_parse(expression_source(src->calculation), &failed_at, &error);
|
||||
dst->warning = expression_parse(expression_source(src->warning), &failed_at, &error);
|
||||
dst->critical = expression_parse(expression_source(src->critical), &failed_at, &error);
|
||||
|
||||
dst->delay_up_duration = src->delay_up_duration;
|
||||
dst->delay_down_duration = src->delay_down_duration;
|
||||
dst->delay_max_duration = src->delay_max_duration;
|
||||
dst->delay_multiplier = src->delay_multiplier;
|
||||
|
||||
dst->has_custom_repeat_config = src->has_custom_repeat_config;
|
||||
dst->warn_repeat_every = src->warn_repeat_every;
|
||||
dst->crit_repeat_every = src->crit_repeat_every;
|
||||
}
|
||||
|
||||
static void health_prototype_apply_to_rrdset(RRDSET *st, RRD_ALERT_PROTOTYPE *ap) {
|
||||
if(!ap->_internal.enabled)
|
||||
return;
|
||||
|
||||
spinlock_lock(&ap->_internal.spinlock);
|
||||
for(RRD_ALERT_PROTOTYPE *t = ap; t ; t = t->_internal.next) {
|
||||
if(!t->match.enabled)
|
||||
continue;
|
||||
|
||||
if(!prototype_matches_host(st->rrdhost, t))
|
||||
continue;
|
||||
|
||||
if(!prototype_matches_rrdset(st, t))
|
||||
continue;
|
||||
|
||||
if(rrdcalc_add_from_prototype(st->rrdhost, st, ap))
|
||||
ap->_internal.uses++;
|
||||
}
|
||||
spinlock_unlock(&ap->_internal.spinlock);
|
||||
}
|
||||
|
||||
void health_prototype_alerts_for_rrdset_incrementally(RRDSET *st) {
|
||||
RRD_ALERT_PROTOTYPE *ap;
|
||||
dfe_start_read(health_globals.prototypes.dict, ap) {
|
||||
health_prototype_apply_to_rrdset(st, ap);
|
||||
}
|
||||
dfe_done(ap);
|
||||
}
|
||||
|
||||
void health_prototype_reset_alerts_for_rrdset(RRDSET *st) {
|
||||
rrdcalc_unlink_and_delete_all_rrdset_alerts(st);
|
||||
health_prototype_alerts_for_rrdset_incrementally(st);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
void health_apply_prototype_to_host(RRDHOST *host, RRD_ALERT_PROTOTYPE *ap) {
|
||||
if(!ap->_internal.enabled)
|
||||
return;
|
||||
|
||||
if(unlikely(!host->health.health_enabled) && !rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH))
|
||||
return;
|
||||
|
||||
RRDSET *st;
|
||||
rrdset_foreach_read(st, host) {
|
||||
health_prototype_apply_to_rrdset(st, ap);
|
||||
}
|
||||
rrdset_foreach_done(st);
|
||||
}
|
||||
|
||||
void health_prototype_apply_to_all_hosts(RRD_ALERT_PROTOTYPE *ap) {
|
||||
if(!ap->_internal.enabled)
|
||||
return;
|
||||
|
||||
RRDHOST *host;
|
||||
dfe_start_reentrant(rrdhost_root_index, host){
|
||||
health_apply_prototype_to_host(host, ap);
|
||||
}
|
||||
dfe_done(host);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
void health_apply_prototypes_to_host(RRDHOST *host) {
|
||||
if(unlikely(!host->health.health_enabled) && !rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH))
|
||||
return;
|
||||
|
||||
// free all running alarms
|
||||
rrdcalc_delete_all(host);
|
||||
|
||||
// invalidate all previous entries in the alarm log
|
||||
rw_spinlock_read_lock(&host->health_log.spinlock);
|
||||
ALARM_ENTRY *t;
|
||||
for(t = host->health_log.alarms ; t ; t = t->next) {
|
||||
if(t->new_status != RRDCALC_STATUS_REMOVED)
|
||||
t->flags |= HEALTH_ENTRY_FLAG_UPDATED;
|
||||
}
|
||||
rw_spinlock_read_unlock(&host->health_log.spinlock);
|
||||
|
||||
// apply all the prototypes for the charts of the host
|
||||
RRDSET *st;
|
||||
rrdset_foreach_read(st, host) {
|
||||
health_prototype_reset_alerts_for_rrdset(st);
|
||||
}
|
||||
rrdset_foreach_done(st);
|
||||
|
||||
#ifdef ENABLE_ACLK
|
||||
if (netdata_cloud_enabled) {
|
||||
struct aclk_sync_cfg_t *wc = host->aclk_config;
|
||||
if (likely(wc)) {
|
||||
wc->alert_queue_removed = SEND_REMOVED_AFTER_HEALTH_LOOPS;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void health_apply_prototypes_to_all_hosts(void) {
|
||||
RRDHOST *host;
|
||||
dfe_start_reentrant(rrdhost_root_index, host){
|
||||
health_apply_prototypes_to_host(host);
|
||||
}
|
||||
dfe_done(host);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
void health_prototype_metadata_foreach(void *data, prototype_metadata_cb_t cb) {
|
||||
RRD_ALERT_PROTOTYPE *ap;
|
||||
dfe_start_read(health_globals.prototypes.dict, ap) {
|
||||
cb(data, ap->config.type, ap->config.component, ap->config.classification, ap->config.recipient);
|
||||
}
|
||||
dfe_done(ap);
|
||||
}
|
120
health/health_prototypes.h
Normal file
120
health/health_prototypes.h
Normal file
|
@ -0,0 +1,120 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_HEALTH_PROTOTYPES_H
|
||||
#define NETDATA_HEALTH_PROTOTYPES_H
|
||||
|
||||
#include "../web/api/queries/rrdr.h"
|
||||
|
||||
typedef enum __attribute__((packed)) {
|
||||
ALERT_ACTION_OPTION_NONE = 0,
|
||||
ALERT_ACTION_OPTION_NO_CLEAR_NOTIFICATION = (1 << 0),
|
||||
} ALERT_ACTION_OPTIONS;
|
||||
|
||||
struct rrd_alert_match {
|
||||
bool enabled;
|
||||
|
||||
bool is_template;
|
||||
union {
|
||||
STRING *chart;
|
||||
STRING *context;
|
||||
} on;
|
||||
|
||||
STRING *os;
|
||||
STRING *host;
|
||||
STRING *charts; // the charts that should be linked to (for templates)
|
||||
STRING *plugin; // the plugin name that should be linked to
|
||||
STRING *module; // the module name that should be linked to
|
||||
STRING *host_labels; // the label read from an alarm file
|
||||
STRING *chart_labels; // the chart label read from an alarm file
|
||||
|
||||
SIMPLE_PATTERN *os_pattern;
|
||||
SIMPLE_PATTERN *host_pattern;
|
||||
SIMPLE_PATTERN *charts_pattern; // the simple pattern of charts
|
||||
SIMPLE_PATTERN *plugin_pattern; // the simple pattern of plugin
|
||||
SIMPLE_PATTERN *module_pattern; // the simple pattern of module
|
||||
SIMPLE_PATTERN *host_labels_pattern; // the simple pattern of labels
|
||||
SIMPLE_PATTERN *chart_labels_pattern; // the simple pattern of chart labels
|
||||
};
|
||||
void rrd_alert_match_cleanup(struct rrd_alert_match *am);
|
||||
|
||||
struct rrd_alert_config {
|
||||
uuid_t hash_id;
|
||||
|
||||
STRING *name; // the name of this alarm
|
||||
|
||||
STRING *exec; // the command to execute when this alarm switches state
|
||||
STRING *recipient; // the recipient of the alarm (the first parameter to exec)
|
||||
|
||||
STRING *classification; // the class that this alarm belongs
|
||||
STRING *component; // the component that this alarm refers to
|
||||
STRING *type; // type of the alarm
|
||||
|
||||
DYNCFG_SOURCE_TYPE source_type;
|
||||
STRING *source; // the source of this alarm
|
||||
STRING *units; // the units of the alarm
|
||||
STRING *summary; // a short alert summary
|
||||
STRING *info; // a description of the alarm
|
||||
STRING *lookup; // the lookup field
|
||||
|
||||
int update_every; // update frequency for the alarm
|
||||
|
||||
ALERT_ACTION_OPTIONS alert_action_options;
|
||||
|
||||
// the red and green threshold of this alarm (to be set to the chart)
|
||||
NETDATA_DOUBLE green;
|
||||
NETDATA_DOUBLE red;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// database lookup settings
|
||||
|
||||
STRING *dimensions; // the chart dimensions
|
||||
RRDR_TIME_GROUPING group; // grouping method: average, max, etc.
|
||||
int before; // ending point in time-series
|
||||
int after; // starting point in time-series
|
||||
RRDR_OPTIONS options; // configuration options
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// expressions related to the alarm
|
||||
|
||||
EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm
|
||||
EVAL_EXPRESSION *warning; // expression to check the warning condition
|
||||
EVAL_EXPRESSION *critical; // expression to check the critical condition
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// notification delay settings
|
||||
|
||||
int delay_up_duration; // duration to delay notifications when alarm raises
|
||||
int delay_down_duration; // duration to delay notifications when alarm lowers
|
||||
int delay_max_duration; // the absolute max delay to apply to this alarm
|
||||
float delay_multiplier; // multiplier for all delays when alarms switch status
|
||||
// while now < delay_up_to
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// notification repeat settings
|
||||
|
||||
bool has_custom_repeat_config;
|
||||
uint32_t warn_repeat_every; // interval between repeating warning notifications
|
||||
uint32_t crit_repeat_every; // interval between repeating critical notifications
|
||||
};
|
||||
void rrd_alert_config_cleanup(struct rrd_alert_config *ac);
|
||||
|
||||
#include "health.h"
|
||||
|
||||
void health_init_prototypes(void);
|
||||
|
||||
bool health_plugin_enabled(void);
|
||||
void health_plugin_disable(void);
|
||||
|
||||
void health_reload_prototypes(void);
|
||||
void health_apply_prototypes_to_host(RRDHOST *host);
|
||||
void health_apply_prototypes_to_all_hosts(void);
|
||||
|
||||
void health_prototype_alerts_for_rrdset_incrementally(RRDSET *st);
|
||||
|
||||
struct rrd_alert_config;
|
||||
struct rrd_alert_match;
|
||||
void health_prototype_copy_config(struct rrd_alert_config *dst, struct rrd_alert_config *src);
|
||||
void health_prototype_copy_match_without_patterns(struct rrd_alert_match *dst, struct rrd_alert_match *src);
|
||||
void health_prototype_reset_alerts_for_rrdset(RRDSET *st);
|
||||
|
||||
#endif //NETDATA_HEALTH_PROTOTYPES_H
|
495
health/health_silencers.c
Normal file
495
health/health_silencers.c
Normal file
|
@ -0,0 +1,495 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "health_internals.h"
|
||||
|
||||
#define HEALTH_CMDAPI_CMD_SILENCEALL "SILENCE ALL"
|
||||
#define HEALTH_CMDAPI_CMD_DISABLEALL "DISABLE ALL"
|
||||
#define HEALTH_CMDAPI_CMD_SILENCE "SILENCE"
|
||||
#define HEALTH_CMDAPI_CMD_DISABLE "DISABLE"
|
||||
#define HEALTH_CMDAPI_CMD_RESET "RESET"
|
||||
#define HEALTH_CMDAPI_CMD_LIST "LIST"
|
||||
|
||||
#define HEALTH_CMDAPI_MSG_AUTHERROR "Auth Error\n"
|
||||
#define HEALTH_CMDAPI_MSG_SILENCEALL "All alarm notifications are silenced\n"
|
||||
#define HEALTH_CMDAPI_MSG_DISABLEALL "All health checks are disabled\n"
|
||||
#define HEALTH_CMDAPI_MSG_RESET "All health checks and notifications are enabled\n"
|
||||
#define HEALTH_CMDAPI_MSG_DISABLE "Health checks disabled for alarms matching the selectors\n"
|
||||
#define HEALTH_CMDAPI_MSG_SILENCE "Alarm notifications silenced for alarms matching the selectors\n"
|
||||
#define HEALTH_CMDAPI_MSG_ADDED "Alarm selector added\n"
|
||||
#define HEALTH_CMDAPI_MSG_STYPEWARNING "WARNING: Added alarm selector to silence/disable alarms without a SILENCE or DISABLE command.\n"
|
||||
#define HEALTH_CMDAPI_MSG_NOSELECTORWARNING "WARNING: SILENCE or DISABLE command is ineffective without defining any alarm selectors.\n"
|
||||
|
||||
SILENCERS *silencers;
|
||||
|
||||
/**
|
||||
* Create Silencer
|
||||
*
|
||||
* Allocate a new silencer to Netdata.
|
||||
*
|
||||
* @return It returns the address off the silencer on success and NULL otherwise
|
||||
*/
|
||||
SILENCER *create_silencer(void) {
|
||||
SILENCER *t = callocz(1, sizeof(SILENCER));
|
||||
netdata_log_debug(D_HEALTH, "HEALTH command API: Created empty silencer");
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Health Silencers add
|
||||
*
|
||||
* Add more one silencer to the list of silencers.
|
||||
*
|
||||
* @param silencer
|
||||
*/
|
||||
void health_silencers_add(SILENCER *silencer) {
|
||||
// Add the created instance to the linked list in silencers
|
||||
silencer->next = silencers->silencers;
|
||||
silencers->silencers = silencer;
|
||||
netdata_log_debug(
|
||||
D_HEALTH,
|
||||
"HEALTH command API: Added silencer %s:%s:%s:%s",
|
||||
silencer->alarms,
|
||||
silencer->charts,
|
||||
silencer->contexts,
|
||||
silencer->hosts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Silencers Add Parameter
|
||||
*
|
||||
* Create a new silencer and adjust the variables
|
||||
*
|
||||
* @param silencer a pointer to the silencer that will be adjusted
|
||||
* @param key the key value sent by client
|
||||
* @param value the value sent to the key
|
||||
*
|
||||
* @return It returns the silencer configured on success and NULL otherwise
|
||||
*/
|
||||
SILENCER *health_silencers_addparam(SILENCER *silencer, char *key, char *value) {
|
||||
static uint32_t
|
||||
hash_alarm = 0,
|
||||
hash_template = 0,
|
||||
hash_chart = 0,
|
||||
hash_context = 0,
|
||||
hash_host = 0;
|
||||
|
||||
if (unlikely(!hash_alarm)) {
|
||||
hash_alarm = simple_uhash(HEALTH_ALARM_KEY);
|
||||
hash_template = simple_uhash(HEALTH_TEMPLATE_KEY);
|
||||
hash_chart = simple_uhash(HEALTH_CHART_KEY);
|
||||
hash_context = simple_uhash(HEALTH_CONTEXT_KEY);
|
||||
hash_host = simple_uhash(HEALTH_HOST_KEY);
|
||||
}
|
||||
|
||||
uint32_t hash = simple_uhash(key);
|
||||
if (unlikely(silencer == NULL)) {
|
||||
if (
|
||||
(hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) ||
|
||||
(hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) ||
|
||||
(hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) ||
|
||||
(hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) ||
|
||||
(hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY))
|
||||
) {
|
||||
silencer = create_silencer();
|
||||
}
|
||||
}
|
||||
|
||||
if (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) {
|
||||
silencer->alarms = strdupz(value);
|
||||
silencer->alarms_pattern = simple_pattern_create(silencer->alarms, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
} else if (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) {
|
||||
silencer->charts = strdupz(value);
|
||||
silencer->charts_pattern = simple_pattern_create(silencer->charts, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
} else if (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) {
|
||||
silencer->contexts = strdupz(value);
|
||||
silencer->contexts_pattern = simple_pattern_create(silencer->contexts, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
} else if (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) {
|
||||
silencer->hosts = strdupz(value);
|
||||
silencer->hosts_pattern = simple_pattern_create(silencer->hosts, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
}
|
||||
|
||||
return silencer;
|
||||
}
|
||||
|
||||
/**
|
||||
* JSON Read Callback
|
||||
*
|
||||
* Callback called by netdata to create the silencer.
|
||||
*
|
||||
* @param e the main json structure
|
||||
*
|
||||
* @return It always return 0.
|
||||
*/
|
||||
int health_silencers_json_read_callback(JSON_ENTRY *e)
|
||||
{
|
||||
switch(e->type) {
|
||||
case JSON_OBJECT:
|
||||
#ifndef ENABLE_JSONC
|
||||
e->callback_function = health_silencers_json_read_callback;
|
||||
if(strcmp(e->name,"")) {
|
||||
// init silencer
|
||||
netdata_log_debug(D_HEALTH, "JSON: Got object with a name, initializing new silencer for %s",e->name);
|
||||
#endif
|
||||
e->callback_data = create_silencer();
|
||||
if(e->callback_data) {
|
||||
health_silencers_add(e->callback_data);
|
||||
}
|
||||
#ifndef ENABLE_JSONC
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
|
||||
case JSON_ARRAY:
|
||||
e->callback_function = health_silencers_json_read_callback;
|
||||
break;
|
||||
|
||||
case JSON_STRING:
|
||||
if(!strcmp(e->name,"type")) {
|
||||
netdata_log_debug(D_HEALTH, "JSON: Processing type=%s",e->data.string);
|
||||
if (!strcmp(e->data.string,"SILENCE")) silencers->stype = STYPE_SILENCE_NOTIFICATIONS;
|
||||
else if (!strcmp(e->data.string,"DISABLE")) silencers->stype = STYPE_DISABLE_ALARMS;
|
||||
} else {
|
||||
netdata_log_debug(D_HEALTH, "JSON: Adding %s=%s", e->name, e->data.string);
|
||||
if (e->callback_data)
|
||||
(void)health_silencers_addparam(e->callback_data, e->name, e->data.string);
|
||||
}
|
||||
break;
|
||||
|
||||
case JSON_BOOLEAN:
|
||||
netdata_log_debug(D_HEALTH, "JSON: Processing all_alarms");
|
||||
silencers->all_alarms=e->data.boolean?1:0;
|
||||
break;
|
||||
|
||||
case JSON_NUMBER:
|
||||
case JSON_NULL:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize Global Silencers
|
||||
*
|
||||
* Initialize the silencer for the whole netdata system.
|
||||
*
|
||||
* @return It returns 0 on success and -1 otherwise
|
||||
*/
|
||||
int health_initialize_global_silencers() {
|
||||
silencers = mallocz(sizeof(SILENCERS));
|
||||
silencers->all_alarms = 0;
|
||||
silencers->stype = STYPE_NONE;
|
||||
silencers->silencers = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Free Silencers
|
||||
*
|
||||
* Clean the silencer structure
|
||||
*
|
||||
* @param t is the structure that will be cleaned.
|
||||
*/
|
||||
void free_silencers(SILENCER *t) {
|
||||
if (!t) return;
|
||||
|
||||
while(t) {
|
||||
SILENCER *next = t->next;
|
||||
|
||||
simple_pattern_free(t->alarms_pattern);
|
||||
simple_pattern_free(t->charts_pattern);
|
||||
simple_pattern_free(t->contexts_pattern);
|
||||
simple_pattern_free(t->hosts_pattern);
|
||||
freez(t->alarms);
|
||||
freez(t->charts);
|
||||
freez(t->contexts);
|
||||
freez(t->hosts);
|
||||
freez(t);
|
||||
|
||||
t = next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Silencers to JSON Entry
|
||||
*
|
||||
* Fill the buffer with the other values given.
|
||||
*
|
||||
* @param wb a pointer to the output buffer
|
||||
* @param var the json variable
|
||||
* @param val the json value
|
||||
* @param hasprev has it a previous value?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
int health_silencers2json_entry(BUFFER *wb, char* var, char* val, int hasprev) {
|
||||
if (val) {
|
||||
buffer_sprintf(wb, "%s\n\t\t\t\"%s\": \"%s\"", (hasprev)?",":"", var, val);
|
||||
return 1;
|
||||
} else {
|
||||
return hasprev;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Silencer to JSON
|
||||
*
|
||||
* Write the silencer values using JSON format inside a buffer.
|
||||
*
|
||||
* @param wb is the buffer to write the silencers.
|
||||
*/
|
||||
void health_silencers2json(BUFFER *wb) {
|
||||
buffer_sprintf(wb, "{\n\t\"all\": %s,"
|
||||
"\n\t\"type\": \"%s\","
|
||||
"\n\t\"silencers\": [",
|
||||
(silencers->all_alarms)?"true":"false",
|
||||
(silencers->stype == STYPE_NONE)?"None":((silencers->stype == STYPE_DISABLE_ALARMS)?"DISABLE":"SILENCE"));
|
||||
|
||||
SILENCER *silencer;
|
||||
int i = 0, j = 0;
|
||||
for(silencer = silencers->silencers; silencer ; silencer = silencer->next) {
|
||||
if(likely(i)) buffer_strcat(wb, ",");
|
||||
buffer_strcat(wb, "\n\t\t{");
|
||||
j=health_silencers2json_entry(wb, HEALTH_ALARM_KEY, silencer->alarms, j);
|
||||
j=health_silencers2json_entry(wb, HEALTH_CHART_KEY, silencer->charts, j);
|
||||
j=health_silencers2json_entry(wb, HEALTH_CONTEXT_KEY, silencer->contexts, j);
|
||||
j=health_silencers2json_entry(wb, HEALTH_HOST_KEY, silencer->hosts, j);
|
||||
j=0;
|
||||
buffer_strcat(wb, "\n\t\t}");
|
||||
i++;
|
||||
}
|
||||
if(likely(i)) buffer_strcat(wb, "\n\t");
|
||||
buffer_strcat(wb, "]\n}\n");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Silencer to FILE
|
||||
*
|
||||
* Write the silencer buffer to a file.
|
||||
* @param wb
|
||||
*/
|
||||
void health_silencers2file(BUFFER *wb) {
|
||||
if (wb->len == 0) return;
|
||||
|
||||
FILE *fd = fopen(health_silencers_filename(), "wb");
|
||||
if(fd) {
|
||||
size_t written = (size_t)fprintf(fd, "%s", wb->buffer) ;
|
||||
if (written == wb->len ) {
|
||||
netdata_log_info("Silencer changes written to %s", health_silencers_filename());
|
||||
}
|
||||
fclose(fd);
|
||||
return;
|
||||
}
|
||||
netdata_log_error("Silencer changes could not be written to %s. Error %s", health_silencers_filename(), strerror(errno));
|
||||
}
|
||||
|
||||
/**
|
||||
* Request V1 MGMT Health
|
||||
*
|
||||
* Function called by api to management the health.
|
||||
*
|
||||
* @param host main structure with client information!
|
||||
* @param w is the structure with all information of the client request.
|
||||
* @param url is the url that netdata is working
|
||||
*
|
||||
* @return It returns 200 on success and another code otherwise.
|
||||
*/
|
||||
int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, char *url) {
|
||||
int ret;
|
||||
(void) host;
|
||||
|
||||
BUFFER *wb = w->response.data;
|
||||
buffer_flush(wb);
|
||||
wb->content_type = CT_TEXT_PLAIN;
|
||||
|
||||
buffer_flush(w->response.data);
|
||||
|
||||
//Local instance of the silencer
|
||||
SILENCER *silencer = NULL;
|
||||
int config_changed = 1;
|
||||
|
||||
if (!w->auth_bearer_token) {
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_AUTHERROR);
|
||||
ret = HTTP_RESP_FORBIDDEN;
|
||||
} else {
|
||||
netdata_log_debug(D_HEALTH, "HEALTH command API: Comparing secret '%s' to '%s'", w->auth_bearer_token, api_secret);
|
||||
if (strcmp(w->auth_bearer_token, api_secret) != 0) {
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_AUTHERROR);
|
||||
ret = HTTP_RESP_FORBIDDEN;
|
||||
} else {
|
||||
while (url) {
|
||||
char *value = strsep_skip_consecutive_separators(&url, "&");
|
||||
if (!value || !*value) continue;
|
||||
|
||||
char *key = strsep_skip_consecutive_separators(&value, "=");
|
||||
if (!key || !*key) continue;
|
||||
if (!value || !*value) continue;
|
||||
|
||||
netdata_log_debug(D_WEB_CLIENT, "%llu: API v1 health query param '%s' with value '%s'", w->id, key, value);
|
||||
|
||||
// name and value are now the parameters
|
||||
if (!strcmp(key, "cmd")) {
|
||||
if (!strcmp(value, HEALTH_CMDAPI_CMD_SILENCEALL)) {
|
||||
silencers->all_alarms = 1;
|
||||
silencers->stype = STYPE_SILENCE_NOTIFICATIONS;
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_SILENCEALL);
|
||||
} else if (!strcmp(value, HEALTH_CMDAPI_CMD_DISABLEALL)) {
|
||||
silencers->all_alarms = 1;
|
||||
silencers->stype = STYPE_DISABLE_ALARMS;
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_DISABLEALL);
|
||||
} else if (!strcmp(value, HEALTH_CMDAPI_CMD_SILENCE)) {
|
||||
silencers->stype = STYPE_SILENCE_NOTIFICATIONS;
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_SILENCE);
|
||||
} else if (!strcmp(value, HEALTH_CMDAPI_CMD_DISABLE)) {
|
||||
silencers->stype = STYPE_DISABLE_ALARMS;
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_DISABLE);
|
||||
} else if (!strcmp(value, HEALTH_CMDAPI_CMD_RESET)) {
|
||||
silencers->all_alarms = 0;
|
||||
silencers->stype = STYPE_NONE;
|
||||
free_silencers(silencers->silencers);
|
||||
silencers->silencers = NULL;
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_RESET);
|
||||
} else if (!strcmp(value, HEALTH_CMDAPI_CMD_LIST)) {
|
||||
w->response.data->content_type = CT_APPLICATION_JSON;
|
||||
health_silencers2json(wb);
|
||||
config_changed=0;
|
||||
}
|
||||
} else {
|
||||
silencer = health_silencers_addparam(silencer, key, value);
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(silencer)) {
|
||||
health_silencers_add(silencer);
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_ADDED);
|
||||
if (silencers->stype == STYPE_NONE) {
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_STYPEWARNING);
|
||||
}
|
||||
}
|
||||
if (unlikely(silencers->stype != STYPE_NONE && !silencers->all_alarms && !silencers->silencers)) {
|
||||
buffer_strcat(wb, HEALTH_CMDAPI_MSG_NOSELECTORWARNING);
|
||||
}
|
||||
ret = HTTP_RESP_OK;
|
||||
}
|
||||
}
|
||||
w->response.data = wb;
|
||||
buffer_no_cacheable(w->response.data);
|
||||
if (ret == HTTP_RESP_OK && config_changed) {
|
||||
BUFFER *jsonb = buffer_create(200, &netdata_buffers_statistics.buffers_health);
|
||||
health_silencers2json(jsonb);
|
||||
health_silencers2file(jsonb);
|
||||
buffer_free(jsonb);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
const char *health_silencers_filename(void) {
|
||||
return string2str(health_globals.config.silencers_filename);
|
||||
}
|
||||
|
||||
void health_set_silencers_filename(void) {
|
||||
char filename[FILENAME_MAX + 1];
|
||||
snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir);
|
||||
|
||||
health_globals.config.silencers_filename =
|
||||
string_strdupz(config_get(CONFIG_SECTION_HEALTH, "silencers file", filename));
|
||||
}
|
||||
|
||||
void health_silencers_init(void) {
|
||||
FILE *fd = fopen(health_silencers_filename(), "r");
|
||||
if (fd) {
|
||||
fseek(fd, 0 , SEEK_END);
|
||||
off_t length = (off_t) ftell(fd);
|
||||
fseek(fd, 0 , SEEK_SET);
|
||||
|
||||
if (length > 0 && length < HEALTH_SILENCERS_MAX_FILE_LEN) {
|
||||
char *str = mallocz((length+1)* sizeof(char));
|
||||
if(str) {
|
||||
size_t copied;
|
||||
copied = fread(str, sizeof(char), length, fd);
|
||||
if (copied == (length* sizeof(char))) {
|
||||
str[length] = 0x00;
|
||||
json_parse(str, NULL, health_silencers_json_read_callback);
|
||||
netdata_log_info("Parsed health silencers file %s", health_silencers_filename());
|
||||
} else {
|
||||
netdata_log_error("Cannot read the data from health silencers file %s", health_silencers_filename());
|
||||
}
|
||||
freez(str);
|
||||
}
|
||||
} else {
|
||||
netdata_log_error("Health silencers file %s has the size %" PRId64 " that is out of range[ 1 , %d ]. Aborting read.",
|
||||
health_silencers_filename(),
|
||||
(int64_t)length,
|
||||
HEALTH_SILENCERS_MAX_FILE_LEN);
|
||||
}
|
||||
fclose(fd);
|
||||
} else {
|
||||
netdata_log_info("Cannot open the file %s, so Netdata will work with the default health configuration.",
|
||||
health_silencers_filename());
|
||||
}
|
||||
}
|
||||
|
||||
SILENCE_TYPE health_silencers_check_silenced(RRDCALC *rc, const char *host) {
|
||||
SILENCER *s;
|
||||
|
||||
for (s = silencers->silencers; s!=NULL; s=s->next){
|
||||
if (
|
||||
(!s->alarms_pattern || (rc->config.name && s->alarms_pattern && simple_pattern_matches_string(s->alarms_pattern, rc->config.name))) &&
|
||||
(!s->contexts_pattern || (rc->rrdset && rc->rrdset->context && s->contexts_pattern && simple_pattern_matches_string(s->contexts_pattern, rc->rrdset->context))) &&
|
||||
(!s->hosts_pattern || (host && s->hosts_pattern && simple_pattern_matches(s->hosts_pattern, host))) &&
|
||||
(!s->charts_pattern || (rc->chart && s->charts_pattern && simple_pattern_matches_string(s->charts_pattern, rc->chart)))
|
||||
) {
|
||||
netdata_log_debug(D_HEALTH, "Alarm matches command API silence entry %s:%s:%s:%s", s->alarms,s->charts, s->contexts, s->hosts);
|
||||
if (unlikely(silencers->stype == STYPE_NONE)) {
|
||||
netdata_log_debug(D_HEALTH, "Alarm %s matched a silence entry, but no SILENCE or DISABLE command was issued via the command API. The match has no effect.", rrdcalc_name(rc));
|
||||
} else {
|
||||
netdata_log_debug(D_HEALTH, "Alarm %s via the command API - name:%s context:%s chart:%s host:%s"
|
||||
, (silencers->stype == STYPE_DISABLE_ALARMS)?"Disabled":"Silenced"
|
||||
, rrdcalc_name(rc)
|
||||
, (rc->rrdset)?rrdset_context(rc->rrdset):""
|
||||
, rrdcalc_chart_name(rc)
|
||||
, host
|
||||
);
|
||||
}
|
||||
return silencers->stype;
|
||||
}
|
||||
}
|
||||
return STYPE_NONE;
|
||||
}
|
||||
|
||||
int health_silencers_update_disabled_silenced(RRDHOST *host, RRDCALC *rc) {
|
||||
uint32_t rrdcalc_flags_old = rc->run_flags;
|
||||
// Clear the flags
|
||||
rc->run_flags &= ~(RRDCALC_FLAG_DISABLED | RRDCALC_FLAG_SILENCED);
|
||||
if (unlikely(silencers->all_alarms)) {
|
||||
if (silencers->stype == STYPE_DISABLE_ALARMS) rc->run_flags |= RRDCALC_FLAG_DISABLED;
|
||||
else if (silencers->stype == STYPE_SILENCE_NOTIFICATIONS) rc->run_flags |= RRDCALC_FLAG_SILENCED;
|
||||
} else {
|
||||
SILENCE_TYPE st = health_silencers_check_silenced(rc, rrdhost_hostname(host));
|
||||
if (st == STYPE_DISABLE_ALARMS) rc->run_flags |= RRDCALC_FLAG_DISABLED;
|
||||
else if (st == STYPE_SILENCE_NOTIFICATIONS) rc->run_flags |= RRDCALC_FLAG_SILENCED;
|
||||
}
|
||||
|
||||
if (rrdcalc_flags_old != rc->run_flags) {
|
||||
netdata_log_info(
|
||||
"Alarm silencing changed for host '%s' alarm '%s': Disabled %s->%s Silenced %s->%s",
|
||||
rrdhost_hostname(host),
|
||||
rrdcalc_name(rc),
|
||||
(rrdcalc_flags_old & RRDCALC_FLAG_DISABLED) ? "true" : "false",
|
||||
(rc->run_flags & RRDCALC_FLAG_DISABLED) ? "true" : "false",
|
||||
(rrdcalc_flags_old & RRDCALC_FLAG_SILENCED) ? "true" : "false",
|
||||
(rc->run_flags & RRDCALC_FLAG_SILENCED) ? "true" : "false");
|
||||
}
|
||||
if (rc->run_flags & RRDCALC_FLAG_DISABLED)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
|
@ -1,18 +1,9 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_HEALTH_LIB
|
||||
# define NETDATA_HEALTH_LIB 1
|
||||
#ifndef NETDATA_HEALTH_SILENCERS_H
|
||||
#define NETDATA_HEALTH_SILENCERS_H
|
||||
|
||||
# include "../libnetdata.h"
|
||||
|
||||
#define HEALTH_ALARM_KEY "alarm"
|
||||
#define HEALTH_TEMPLATE_KEY "template"
|
||||
#define HEALTH_CONTEXT_KEY "context"
|
||||
#define HEALTH_CHART_KEY "chart"
|
||||
#define HEALTH_HOST_KEY "hosts"
|
||||
#define HEALTH_OS_KEY "os"
|
||||
#define HEALTH_LOOKUP_KEY "lookup"
|
||||
#define HEALTH_CALC_KEY "calc"
|
||||
#include "health.h"
|
||||
|
||||
typedef struct silencer {
|
||||
char *alarms;
|
||||
|
@ -50,4 +41,15 @@ void health_silencers_add(SILENCER *silencer);
|
|||
SILENCER * health_silencers_addparam(SILENCER *silencer, char *key, char *value);
|
||||
int health_initialize_global_silencers();
|
||||
|
||||
#endif
|
||||
void free_silencers(SILENCER *t);
|
||||
|
||||
struct web_client;
|
||||
int web_client_api_request_v1_mgmt_health(RRDHOST *host, struct web_client *w, char *url);
|
||||
|
||||
const char *health_silencers_filename(void);
|
||||
void health_set_silencers_filename(void);
|
||||
void health_silencers_init(void);
|
||||
SILENCE_TYPE health_silencers_check_silenced(RRDCALC *rc, const char *host);
|
||||
int health_silencers_update_disabled_silenced(RRDHOST *host, RRDCALC *rc);
|
||||
|
||||
#endif //NETDATA_HEALTH_SILENCERS_H
|
486
health/health_variable.c
Normal file
486
health/health_variable.c
Normal file
|
@ -0,0 +1,486 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "health.h"
|
||||
#include "health_internals.h"
|
||||
|
||||
struct variable_lookup_score {
|
||||
RRDSET *st;
|
||||
const char *source;
|
||||
NETDATA_DOUBLE value;
|
||||
size_t score;
|
||||
};
|
||||
|
||||
struct variable_lookup_job {
|
||||
RRDCALC *rc;
|
||||
RRDHOST *host;
|
||||
STRING *variable;
|
||||
STRING *dim;
|
||||
const char *dimension;
|
||||
size_t dimension_length;
|
||||
enum {
|
||||
DIM_SELECT_NORMAL,
|
||||
DIM_SELECT_RAW,
|
||||
DIM_SELECT_LAST_COLLECTED,
|
||||
} dimension_selection;
|
||||
|
||||
struct {
|
||||
size_t size;
|
||||
size_t used;
|
||||
struct variable_lookup_score *array;
|
||||
} result;
|
||||
|
||||
struct {
|
||||
RRDSET *last_rrdset;
|
||||
size_t last_score;
|
||||
} score;
|
||||
};
|
||||
|
||||
static void variable_lookup_add_result_with_score(struct variable_lookup_job *vbd, NETDATA_DOUBLE n, RRDSET *st, const char *source __maybe_unused) {
|
||||
if(vbd->score.last_rrdset != st) {
|
||||
vbd->score.last_rrdset = st;
|
||||
vbd->score.last_score = rrdlabels_common_count(vbd->rc->rrdset->rrdlabels, st->rrdlabels);
|
||||
}
|
||||
|
||||
if(vbd->result.used >= vbd->result.size) {
|
||||
if(!vbd->result.size)
|
||||
vbd->result.size = 1;
|
||||
|
||||
vbd->result.size *= 2;
|
||||
vbd->result.array = reallocz(vbd->result.array, sizeof(struct variable_lookup_score) * vbd->result.size);
|
||||
}
|
||||
|
||||
vbd->result.array[vbd->result.used++] = (struct variable_lookup_score) {
|
||||
.value = n,
|
||||
.score = vbd->score.last_score,
|
||||
.st = st,
|
||||
.source = source,
|
||||
};
|
||||
}
|
||||
|
||||
static bool variable_lookup_in_chart(struct variable_lookup_job *vbd, RRDSET *st, bool stop_on_match) {
|
||||
bool found = false;
|
||||
const DICTIONARY_ITEM *item = NULL;
|
||||
RRDDIM *rd = NULL;
|
||||
dfe_start_read(st->rrddim_root_index, rd) {
|
||||
if(rd->id == vbd->dim || rd->name == vbd->dim) {
|
||||
item = dictionary_acquired_item_dup(st->rrddim_root_index, rd_dfe.item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
dfe_done(rd);
|
||||
|
||||
if (item) {
|
||||
switch (vbd->dimension_selection) {
|
||||
case DIM_SELECT_NORMAL:
|
||||
variable_lookup_add_result_with_score(vbd, (NETDATA_DOUBLE)rd->collector.last_stored_value, st, "last stored value of dimension");
|
||||
break;
|
||||
case DIM_SELECT_RAW:
|
||||
variable_lookup_add_result_with_score(vbd, (NETDATA_DOUBLE)rd->collector.last_collected_value, st, "last collected value of dimension");
|
||||
break;
|
||||
case DIM_SELECT_LAST_COLLECTED:
|
||||
variable_lookup_add_result_with_score(vbd, (NETDATA_DOUBLE)rd->collector.last_collected_time.tv_sec, st, "last collected time of dimension");
|
||||
break;
|
||||
}
|
||||
|
||||
dictionary_acquired_item_release(st->rrddim_root_index, item);
|
||||
found = true;
|
||||
}
|
||||
if(found && stop_on_match) goto cleanup;
|
||||
|
||||
// chart variable
|
||||
{
|
||||
NETDATA_DOUBLE n;
|
||||
if(rrdvar_get_custom_chart_variable_value(st, vbd->variable, &n)) {
|
||||
variable_lookup_add_result_with_score(vbd, n, st, "chart variable");
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
if(found && stop_on_match) goto cleanup;
|
||||
|
||||
cleanup:
|
||||
return found;
|
||||
}
|
||||
|
||||
static int foreach_instance_in_context_cb(RRDSET *st, void *data) {
|
||||
struct variable_lookup_job *vbd = data;
|
||||
return variable_lookup_in_chart(vbd, st, false) ? 1 : 0;
|
||||
}
|
||||
|
||||
static bool variable_lookup_context(struct variable_lookup_job *vbd, const char *chart_or_context, const char *dim_id_or_name) {
|
||||
struct variable_lookup_job vbd_back = *vbd;
|
||||
|
||||
vbd->dimension = dim_id_or_name;
|
||||
vbd->dim = string_strdupz(vbd->dimension);
|
||||
vbd->dimension_length = string_strlen(vbd->dim);
|
||||
// vbd->dimension_selection = DIM_SELECT_NORMAL;
|
||||
|
||||
bool found = false;
|
||||
|
||||
// lookup chart in host
|
||||
|
||||
RRDSET_ACQUIRED *rsa = rrdset_find_and_acquire(vbd->host, chart_or_context);
|
||||
if(rsa) {
|
||||
if(variable_lookup_in_chart(vbd, rrdset_acquired_to_rrdset(rsa), false))
|
||||
found = true;
|
||||
rrdset_acquired_release(rsa);
|
||||
}
|
||||
|
||||
// lookup context in contexts, then foreach chart
|
||||
|
||||
if(rrdcontext_foreach_instance_with_rrdset_in_context(vbd->host, chart_or_context, foreach_instance_in_context_cb, vbd) > 0)
|
||||
found = true;
|
||||
|
||||
string_freez(vbd->dim);
|
||||
|
||||
vbd->dimension = vbd_back.dimension;
|
||||
vbd->dim = vbd_back.dim;
|
||||
vbd->dimension_length = vbd_back.dimension_length;
|
||||
// vbd->dimension_selection = vbd_back.dimension_selection;
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
bool alert_variable_from_running_alerts(struct variable_lookup_job *vbd) {
|
||||
bool found = false;
|
||||
RRDCALC *rc;
|
||||
foreach_rrdcalc_in_rrdhost_read(vbd->host, rc) {
|
||||
if(rc->config.name == vbd->variable) {
|
||||
variable_lookup_add_result_with_score(vbd, (NETDATA_DOUBLE)rc->value, rc->rrdset, "alarm value");
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
foreach_rrdcalc_in_rrdhost_done(rc);
|
||||
return found;
|
||||
}
|
||||
|
||||
bool alert_variable_lookup_internal(STRING *variable, void *data, NETDATA_DOUBLE *result, BUFFER *wb) {
|
||||
static STRING *this_string = NULL,
|
||||
*now_string = NULL,
|
||||
*after_string = NULL,
|
||||
*before_string = NULL,
|
||||
*status_string = NULL,
|
||||
*removed_string = NULL,
|
||||
*uninitialized_string = NULL,
|
||||
*undefined_string = NULL,
|
||||
*clear_string = NULL,
|
||||
*warning_string = NULL,
|
||||
*critical_string = NULL,
|
||||
*last_collected_t_string = NULL,
|
||||
*green_string = NULL,
|
||||
*red_string = NULL,
|
||||
*update_every_string = NULL;
|
||||
|
||||
|
||||
struct variable_lookup_job vbd = { 0 };
|
||||
|
||||
// const char *v_name = string2str(variable);
|
||||
// bool trace_this = false;
|
||||
// if(strcmp(v_name, "btrfs_allocated") == 0)
|
||||
// trace_this = true;
|
||||
|
||||
bool found = false;
|
||||
|
||||
const char *source = NULL;
|
||||
RRDSET *source_st = NULL;
|
||||
|
||||
RRDCALC *rc = data;
|
||||
RRDSET *st = rc->rrdset;
|
||||
|
||||
if(!st)
|
||||
return false;
|
||||
|
||||
if(unlikely(!last_collected_t_string)) {
|
||||
this_string = string_strdupz("this");
|
||||
now_string = string_strdupz("now");
|
||||
after_string = string_strdupz("after");
|
||||
before_string = string_strdupz("before");
|
||||
status_string = string_strdupz("status");
|
||||
removed_string = string_strdupz("REMOVED");
|
||||
undefined_string = string_strdupz("UNDEFINED");
|
||||
uninitialized_string = string_strdupz("UNINITIALIZED");
|
||||
clear_string = string_strdupz("CLEAR");
|
||||
warning_string = string_strdupz("WARNING");
|
||||
critical_string = string_strdupz("CRITICAL");
|
||||
last_collected_t_string = string_strdupz("last_collected_t");
|
||||
green_string = string_strdupz("green");
|
||||
red_string = string_strdupz("red");
|
||||
update_every_string = string_strdupz("update_every");
|
||||
}
|
||||
|
||||
if(unlikely(variable == this_string)) {
|
||||
*result = (NETDATA_DOUBLE)rc->value;
|
||||
source = "current alert value";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == after_string)) {
|
||||
*result = (NETDATA_DOUBLE)rc->db_after;
|
||||
source = "current alert query start time";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == before_string)) {
|
||||
*result = (NETDATA_DOUBLE)rc->db_before;
|
||||
source = "current alert query end time";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == now_string)) {
|
||||
*result = (NETDATA_DOUBLE)now_realtime_sec();
|
||||
source = "current wall-time clock timestamp";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == status_string)) {
|
||||
*result = (NETDATA_DOUBLE)rc->status;
|
||||
source = "current alert status";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == removed_string)) {
|
||||
*result = (NETDATA_DOUBLE)RRDCALC_STATUS_REMOVED;
|
||||
source = "removed status constant";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == uninitialized_string)) {
|
||||
*result = (NETDATA_DOUBLE)RRDCALC_STATUS_UNINITIALIZED;
|
||||
source = "uninitialized status constant";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == undefined_string)) {
|
||||
*result = (NETDATA_DOUBLE)RRDCALC_STATUS_UNDEFINED;
|
||||
source = "undefined status constant";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == clear_string)) {
|
||||
*result = (NETDATA_DOUBLE)RRDCALC_STATUS_CLEAR;
|
||||
source = "clear status constant";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == warning_string)) {
|
||||
*result = (NETDATA_DOUBLE)RRDCALC_STATUS_WARNING;
|
||||
source = "warning status constant";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == critical_string)) {
|
||||
*result = (NETDATA_DOUBLE)RRDCALC_STATUS_CRITICAL;
|
||||
source = "critical status constant";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == last_collected_t_string)) {
|
||||
*result = (NETDATA_DOUBLE)st->last_collected_time.tv_sec;
|
||||
source = "current instance last_collected_t";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == update_every_string)) {
|
||||
*result = (NETDATA_DOUBLE)st->update_every;
|
||||
source = "current instance update_every";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == green_string)) {
|
||||
*result = (NETDATA_DOUBLE)rc->config.green;
|
||||
source = "current alert green threshold";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
if(unlikely(variable == red_string)) {
|
||||
*result = (NETDATA_DOUBLE)rc->config.red;
|
||||
source = "current alert red threshold";
|
||||
source_st = st;
|
||||
found = true;
|
||||
goto log;
|
||||
}
|
||||
|
||||
// find the dimension id/name
|
||||
|
||||
vbd = (struct variable_lookup_job){
|
||||
.rc = rc,
|
||||
.host = st->rrdhost,
|
||||
.variable = variable,
|
||||
.dimension = string2str(variable),
|
||||
.dimension_length = string_strlen(variable),
|
||||
.dimension_selection = DIM_SELECT_NORMAL,
|
||||
.dim = string_dup(variable),
|
||||
.result = { 0 },
|
||||
};
|
||||
if (strendswith_lengths(vbd.dimension, vbd.dimension_length, "_raw", 4)) {
|
||||
vbd.dimension_length -= 4;
|
||||
vbd.dimension_selection = DIM_SELECT_RAW;
|
||||
vbd.dim = string_strndupz(vbd.dimension, vbd.dimension_length);
|
||||
} else if (strendswith_lengths(vbd.dimension, vbd.dimension_length, "_last_collected_t", 17)) {
|
||||
vbd.dimension_length -= 17;
|
||||
vbd.dimension_selection = DIM_SELECT_LAST_COLLECTED;
|
||||
vbd.dim = string_strndupz(vbd.dimension, vbd.dimension_length);
|
||||
}
|
||||
|
||||
if(variable_lookup_in_chart(&vbd, st, true)) {
|
||||
found = true;
|
||||
goto find_best_scored;
|
||||
}
|
||||
|
||||
// host variables
|
||||
{
|
||||
NETDATA_DOUBLE n;
|
||||
found = rrdvar_get_custom_host_variable_value(vbd.host, vbd.variable, &n);
|
||||
if(found) {
|
||||
variable_lookup_add_result_with_score(&vbd, n, st, "host variable");
|
||||
goto find_best_scored;
|
||||
}
|
||||
}
|
||||
|
||||
// alert names
|
||||
if(alert_variable_from_running_alerts(&vbd)) {
|
||||
found = true;
|
||||
goto find_best_scored;
|
||||
}
|
||||
|
||||
// find the components of the variable
|
||||
{
|
||||
char id[string_strlen(vbd.dim) + 1];
|
||||
memcpy(id, string2str(vbd.dim), string_strlen(vbd.dim));
|
||||
id[string_strlen(vbd.dim)] = '\0';
|
||||
|
||||
char *dot = strrchr(id, '.');
|
||||
while(dot) {
|
||||
*dot = '\0';
|
||||
|
||||
if(strchr(id, '.') == NULL) break;
|
||||
|
||||
if(variable_lookup_context(&vbd, id, dot + 1))
|
||||
found = true;
|
||||
|
||||
char *dot2 = strrchr(id, '.');
|
||||
*dot = '.';
|
||||
dot = dot2;
|
||||
}
|
||||
}
|
||||
|
||||
find_best_scored:
|
||||
if(found && vbd.result.array) {
|
||||
struct variable_lookup_score *best = &vbd.result.array[0];
|
||||
for (size_t i = 1; i < vbd.result.used; i++)
|
||||
if (vbd.result.array[i].score > best->score)
|
||||
best = &vbd.result.array[i];
|
||||
|
||||
source = best->source;
|
||||
source_st = best->st;
|
||||
*result = best->value;
|
||||
freez(vbd.result.array);
|
||||
}
|
||||
else {
|
||||
found = false;
|
||||
*result = NAN;
|
||||
}
|
||||
|
||||
log:
|
||||
#ifdef NETDATA_LOG_HEALTH_VARIABLES_LOOKUP
|
||||
if(found) {
|
||||
nd_log(NDLS_DAEMON, NDLP_INFO,
|
||||
"HEALTH_VARIABLE_LOOKUP: variable '%s' of alert '%s' of chart '%s', context '%s', host '%s' "
|
||||
"resolved with %s of chart '%s' and context '%s'",
|
||||
string2str(variable),
|
||||
string2str(rc->config.name),
|
||||
string2str(rc->rrdset->id),
|
||||
string2str(rc->rrdset->context),
|
||||
string2str(rc->rrdset->rrdhost->hostname),
|
||||
source,
|
||||
string2str(source_st->id),
|
||||
string2str(source_st->context)
|
||||
);
|
||||
}
|
||||
else {
|
||||
nd_log(NDLS_DAEMON, NDLP_INFO,
|
||||
"HEALTH_VARIABLE_LOOKUP: variable '%s' of alert '%s' of chart '%s', context '%s', host '%s' "
|
||||
"could not be resolved",
|
||||
string2str(variable),
|
||||
string2str(rc->config.name),
|
||||
string2str(rc->rrdset->id),
|
||||
string2str(rc->rrdset->context),
|
||||
string2str(rc->rrdset->rrdhost->hostname)
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
if(unlikely(wb)) {
|
||||
buffer_json_member_add_string(wb, "variable", string2str(variable));
|
||||
buffer_json_member_add_string(wb, "instance", string2str(st->id));
|
||||
buffer_json_member_add_string(wb, "context", string2str(st->context));
|
||||
buffer_json_member_add_boolean(wb, "found", found);
|
||||
|
||||
if (found) {
|
||||
buffer_json_member_add_double(wb, "value", *result);
|
||||
buffer_json_member_add_object(wb, "source");
|
||||
{
|
||||
buffer_json_member_add_string(wb, "description", source);
|
||||
buffer_json_member_add_string(wb, "instance", string2str(source_st->id));
|
||||
buffer_json_member_add_string(wb, "context", string2str(source_st->context));
|
||||
buffer_json_member_add_uint64(wb, "candidates", vbd.result.used ? vbd.result.used : 1);
|
||||
}
|
||||
buffer_json_object_close(wb); // source
|
||||
}
|
||||
}
|
||||
|
||||
string_freez(vbd.dim);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
bool alert_variable_lookup(STRING *variable, void *data, NETDATA_DOUBLE *result) {
|
||||
return alert_variable_lookup_internal(variable, data, result, NULL);
|
||||
}
|
||||
|
||||
int alert_variable_lookup_trace(RRDHOST *host __maybe_unused, RRDSET *st, const char *variable, BUFFER *wb) {
|
||||
int code = HTTP_RESP_INTERNAL_SERVER_ERROR;
|
||||
|
||||
buffer_flush(wb);
|
||||
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
|
||||
|
||||
STRING *v = string_strdupz(variable);
|
||||
RRDCALC rc = {
|
||||
.rrdset = st,
|
||||
};
|
||||
|
||||
NETDATA_DOUBLE n;
|
||||
alert_variable_lookup_internal(v, &rc, &n, wb);
|
||||
|
||||
string_freez(v);
|
||||
|
||||
buffer_json_finalize(wb);
|
||||
return code;
|
||||
}
|
539
health/rrdcalc.c
Normal file
539
health/rrdcalc.c
Normal file
|
@ -0,0 +1,539 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "database/rrd.h"
|
||||
#include "health_internals.h"
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC helpers
|
||||
|
||||
void rrdcalc_flags_to_json_array(BUFFER *wb, const char *key, RRDCALC_FLAGS flags) {
|
||||
buffer_json_member_add_array(wb, key);
|
||||
|
||||
if(flags & RRDCALC_FLAG_DB_ERROR)
|
||||
buffer_json_add_array_item_string(wb, "DB_ERROR");
|
||||
if(flags & RRDCALC_FLAG_DB_NAN)
|
||||
buffer_json_add_array_item_string(wb, "DB_NAN");
|
||||
if(flags & RRDCALC_FLAG_CALC_ERROR)
|
||||
buffer_json_add_array_item_string(wb, "CALC_ERROR");
|
||||
if(flags & RRDCALC_FLAG_WARN_ERROR)
|
||||
buffer_json_add_array_item_string(wb, "WARN_ERROR");
|
||||
if(flags & RRDCALC_FLAG_CRIT_ERROR)
|
||||
buffer_json_add_array_item_string(wb, "CRIT_ERROR");
|
||||
if(flags & RRDCALC_FLAG_RUNNABLE)
|
||||
buffer_json_add_array_item_string(wb, "RUNNABLE");
|
||||
if(flags & RRDCALC_FLAG_DISABLED)
|
||||
buffer_json_add_array_item_string(wb, "DISABLED");
|
||||
if(flags & RRDCALC_FLAG_SILENCED)
|
||||
buffer_json_add_array_item_string(wb, "SILENCED");
|
||||
if(flags & RRDCALC_FLAG_RUN_ONCE)
|
||||
buffer_json_add_array_item_string(wb, "RUN_ONCE");
|
||||
|
||||
buffer_json_array_close(wb);
|
||||
}
|
||||
|
||||
inline const char *rrdcalc_status2string(RRDCALC_STATUS status) {
|
||||
switch(status) {
|
||||
case RRDCALC_STATUS_REMOVED:
|
||||
return "REMOVED";
|
||||
|
||||
case RRDCALC_STATUS_UNDEFINED:
|
||||
return "UNDEFINED";
|
||||
|
||||
case RRDCALC_STATUS_UNINITIALIZED:
|
||||
return "UNINITIALIZED";
|
||||
|
||||
case RRDCALC_STATUS_CLEAR:
|
||||
return "CLEAR";
|
||||
|
||||
case RRDCALC_STATUS_RAISED:
|
||||
return "RAISED";
|
||||
|
||||
case RRDCALC_STATUS_WARNING:
|
||||
return "WARNING";
|
||||
|
||||
case RRDCALC_STATUS_CRITICAL:
|
||||
return "CRITICAL";
|
||||
|
||||
default:
|
||||
netdata_log_error("Unknown alarm status %d", status);
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id) {
|
||||
rw_spinlock_read_lock(&host->health_log.spinlock);
|
||||
|
||||
// re-use old IDs, by looking them up in the alarm log
|
||||
ALARM_ENTRY *ae = NULL;
|
||||
for(ae = host->health_log.alarms; ae ;ae = ae->next) {
|
||||
if(unlikely(name == ae->name && chart == ae->chart && !uuid_memcmp(&ae->config_hash_id, config_hash_id))) {
|
||||
if(next_event_id) *next_event_id = ae->alarm_event_id + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t alarm_id;
|
||||
|
||||
if(ae)
|
||||
alarm_id = ae->alarm_id;
|
||||
else {
|
||||
alarm_id = sql_get_alarm_id(host, chart, name, next_event_id);
|
||||
if (!alarm_id) {
|
||||
if (unlikely(!host->health_log.next_alarm_id))
|
||||
host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
|
||||
alarm_id = host->health_log.next_alarm_id++;
|
||||
}
|
||||
}
|
||||
|
||||
rw_spinlock_read_unlock(&host->health_log.spinlock);
|
||||
return alarm_id;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC replacing info/summary text variables with RRDSET labels
|
||||
|
||||
static STRING *rrdcalc_replace_variables_with_rrdset_labels(const char *line, RRDCALC *rc) {
|
||||
if (!line || !*line)
|
||||
return NULL;
|
||||
|
||||
size_t pos = 0;
|
||||
char *temp = strdupz(line);
|
||||
char var[RRDCALC_VAR_MAX];
|
||||
char *m, *lbl_value = NULL;
|
||||
|
||||
while ((m = strchr(temp + pos, '$')) && *(m+1) == '{') {
|
||||
int i = 0;
|
||||
char *e = m;
|
||||
while (*e) {
|
||||
var[i++] = *e;
|
||||
|
||||
if (*e == '}' || i == RRDCALC_VAR_MAX - 1)
|
||||
break;
|
||||
|
||||
e++;
|
||||
}
|
||||
|
||||
var[i] = '\0';
|
||||
pos = m - temp + 1;
|
||||
|
||||
if (!strcmp(var, RRDCALC_VAR_FAMILY)) {
|
||||
char *buf = find_and_replace(temp, var, (rc->rrdset && rc->rrdset->family) ? rrdset_family(rc->rrdset) : "", m);
|
||||
freez(temp);
|
||||
temp = buf;
|
||||
}
|
||||
else if (!strncmp(var, RRDCALC_VAR_LABEL, RRDCALC_VAR_LABEL_LEN)) {
|
||||
char label_val[RRDCALC_VAR_MAX + RRDCALC_VAR_LABEL_LEN + 1] = { 0 };
|
||||
strcpy(label_val, var+RRDCALC_VAR_LABEL_LEN);
|
||||
label_val[i - RRDCALC_VAR_LABEL_LEN - 1] = '\0';
|
||||
|
||||
if(likely(rc->rrdset && rc->rrdset->rrdlabels)) {
|
||||
lbl_value = NULL;
|
||||
rrdlabels_get_value_strdup_or_null(rc->rrdset->rrdlabels, &lbl_value, label_val);
|
||||
if (lbl_value) {
|
||||
char *buf = find_and_replace(temp, var, lbl_value, m);
|
||||
freez(temp);
|
||||
temp = buf;
|
||||
freez(lbl_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
STRING *ret = string_strdupz(temp);
|
||||
freez(temp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc) {
|
||||
if(rc->rrdset && rc->rrdset->rrdlabels) {
|
||||
size_t labels_version = rrdlabels_version(rc->rrdset->rrdlabels);
|
||||
if (rc->labels_version != labels_version) {
|
||||
STRING *old;
|
||||
|
||||
old = rc->info;
|
||||
rc->info = rrdcalc_replace_variables_with_rrdset_labels(string2str(rc->config.info), rc);
|
||||
string_freez(old);
|
||||
|
||||
old = rc->summary;
|
||||
rc->summary = rrdcalc_replace_variables_with_rrdset_labels(string2str(rc->config.summary), rc);
|
||||
string_freez(old);
|
||||
|
||||
rc->labels_version = labels_version;
|
||||
}
|
||||
}
|
||||
|
||||
if(!rc->summary)
|
||||
rc->summary = string_dup(rc->config.summary);
|
||||
|
||||
if(!rc->info)
|
||||
rc->info = string_dup(rc->config.info);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC index management for RRDSET
|
||||
|
||||
// the dictionary requires a unique key for every item
|
||||
// we use {chart id}.{alert name} for both the RRDHOST and RRDSET alert indexes.
|
||||
|
||||
#define RRDCALC_MAX_KEY_SIZE 1024
|
||||
static size_t rrdcalc_key(char *dst, size_t dst_len, const char *chart, const char *alert) {
|
||||
return snprintfz(dst, dst_len, "%s,on[%s]", alert, chart);
|
||||
}
|
||||
|
||||
const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name) {
|
||||
char key[RRDCALC_MAX_KEY_SIZE + 1];
|
||||
size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_id(st), alert_name);
|
||||
|
||||
const RRDCALC_ACQUIRED *rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)key_len);
|
||||
|
||||
if(!rca) {
|
||||
key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE, rrdset_name(st), alert_name);
|
||||
rca = (const RRDCALC_ACQUIRED *)dictionary_get_and_acquire_item_advanced(st->rrdhost->rrdcalc_root_index, key, (ssize_t)key_len);
|
||||
}
|
||||
|
||||
return rca;
|
||||
}
|
||||
|
||||
void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca) {
|
||||
if(!rca) return;
|
||||
|
||||
dictionary_acquired_item_release(st->rrdhost->rrdcalc_root_index, (const DICTIONARY_ITEM *)rca);
|
||||
}
|
||||
|
||||
RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca) {
|
||||
if(rca)
|
||||
return dictionary_acquired_item_value((const DICTIONARY_ITEM *)rca);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC managing the linking with RRDSET
|
||||
|
||||
static void rrdcalc_link_to_rrdset(RRDCALC *rc) {
|
||||
RRDSET *st = rc->rrdset;
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
rw_spinlock_write_lock(&st->alerts.spinlock);
|
||||
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(st->alerts.base, rc, prev, next);
|
||||
rw_spinlock_write_unlock(&st->alerts.spinlock);
|
||||
|
||||
char buf[RRDVAR_MAX_LENGTH + 1];
|
||||
snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), rrdcalc_name(rc));
|
||||
STRING *rrdset_name_rrdcalc_name = string_strdupz(buf);
|
||||
snprintfz(buf, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), rrdcalc_name(rc));
|
||||
STRING *rrdset_id_rrdcalc_name = string_strdupz(buf);
|
||||
|
||||
string_freez(rrdset_id_rrdcalc_name);
|
||||
string_freez(rrdset_name_rrdcalc_name);
|
||||
|
||||
time_t now = now_realtime_sec();
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host,
|
||||
rc,
|
||||
now,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
RRDCALC_STATUS_REMOVED,
|
||||
rc->status,
|
||||
0,
|
||||
rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0);
|
||||
|
||||
health_alarm_log_add_entry(host, ae);
|
||||
rrdset_flag_set(st, RRDSET_FLAG_HAS_RRDCALC_LINKED);
|
||||
|
||||
}
|
||||
|
||||
static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) {
|
||||
RRDSET *st = rc->rrdset;
|
||||
|
||||
if(!st) {
|
||||
netdata_log_error(
|
||||
"Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET",
|
||||
rrdcalc_chart_name(rc), rrdcalc_name(rc));
|
||||
return;
|
||||
}
|
||||
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
time_t now = now_realtime_sec();
|
||||
|
||||
if (likely(rc->status != RRDCALC_STATUS_REMOVED)) {
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host,
|
||||
rc,
|
||||
now,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
rc->status,
|
||||
RRDCALC_STATUS_REMOVED,
|
||||
0,
|
||||
0);
|
||||
|
||||
health_alarm_log_add_entry(host, ae);
|
||||
}
|
||||
|
||||
// unlink it
|
||||
|
||||
if(!having_ll_wrlock)
|
||||
rw_spinlock_write_lock(&st->alerts.spinlock);
|
||||
|
||||
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(st->alerts.base, rc, prev, next);
|
||||
|
||||
if(!having_ll_wrlock)
|
||||
rw_spinlock_write_unlock(&st->alerts.spinlock);
|
||||
|
||||
rc->rrdset = NULL;
|
||||
}
|
||||
|
||||
static inline bool rrdcalc_check_if_it_matches_rrdset(RRDCALC *rc, RRDSET *st) {
|
||||
if ( (rc->chart != st->id)
|
||||
&& (rc->chart != st->name))
|
||||
return false;
|
||||
|
||||
if (rc->match.module_pattern && !simple_pattern_matches_string(rc->match.module_pattern, st->module_name))
|
||||
return false;
|
||||
|
||||
if (rc->match.plugin_pattern && !simple_pattern_matches_string(rc->match.plugin_pattern, st->module_name))
|
||||
return false;
|
||||
|
||||
if (st->rrdlabels && rc->match.chart_labels_pattern && !rrdlabels_match_simple_pattern_parsed(
|
||||
st->rrdlabels, rc->match.chart_labels_pattern, '=', NULL))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC rrdhost index management - constructor
|
||||
|
||||
struct rrdcalc_constructor {
|
||||
RRDSET *rrdset;
|
||||
RRD_ALERT_PROTOTYPE *ap;
|
||||
|
||||
enum {
|
||||
RRDCALC_REACT_NONE,
|
||||
RRDCALC_REACT_NEW,
|
||||
} react_action;
|
||||
};
|
||||
|
||||
static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) {
|
||||
RRDCALC *rc = rrdcalc;
|
||||
struct rrdcalc_constructor *ctr = constructor_data;
|
||||
RRDSET *st = ctr->rrdset;
|
||||
RRDHOST *host = st->rrdhost;
|
||||
RRD_ALERT_PROTOTYPE *ap = ctr->ap;
|
||||
|
||||
rc->key = string_strdupz(dictionary_acquired_item_name(item));
|
||||
rc->rrdset = st;
|
||||
rc->chart = string_dup(st->id);
|
||||
|
||||
health_prototype_copy_config(&rc->config, &ap->config);
|
||||
health_prototype_copy_match_without_patterns(&rc->match, &ap->match);
|
||||
|
||||
rc->next_event_id = 1;
|
||||
rc->value = NAN;
|
||||
rc->old_value = NAN;
|
||||
rc->last_repeat = 0;
|
||||
rc->times_repeat = 0;
|
||||
rc->last_status_change_value = rc->value;
|
||||
rc->last_status_change = now_realtime_sec();
|
||||
|
||||
if(!rc->config.units)
|
||||
rc->config.units = string_dup(st->units);
|
||||
|
||||
if(rc->config.update_every < rc->rrdset->update_every) {
|
||||
netdata_log_info(
|
||||
"HEALTH: alert '%s.%s' has update every %d, less than chart update every %d. "
|
||||
"Setting alarm update frequency to %d.",
|
||||
string2str(st->id), string2str(rc->config.name),
|
||||
rc->config.update_every, rc->rrdset->update_every, rc->rrdset->update_every);
|
||||
|
||||
rc->config.update_every = st->update_every;
|
||||
}
|
||||
|
||||
rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->config.name, &rc->next_event_id, &rc->config.hash_id);
|
||||
|
||||
if(!isnan(rc->config.green) && isnan(st->green))
|
||||
st->green = rc->config.green;
|
||||
|
||||
if(!isnan(rc->config.red) && isnan(st->red))
|
||||
st->red = rc->config.red;
|
||||
|
||||
expression_set_variable_lookup_callback(rc->config.calculation, alert_variable_lookup, rc);
|
||||
expression_set_variable_lookup_callback(rc->config.warning, alert_variable_lookup, rc);
|
||||
expression_set_variable_lookup_callback(rc->config.critical, alert_variable_lookup, rc);
|
||||
|
||||
rrdcalc_update_info_using_rrdset_labels(rc);
|
||||
|
||||
ctr->react_action = RRDCALC_REACT_NEW;
|
||||
}
|
||||
|
||||
static bool rrdcalc_rrdhost_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc __maybe_unused, void *rrdcalc_new __maybe_unused, void *constructor_data) {
|
||||
struct rrdcalc_constructor *ctr = constructor_data;
|
||||
ctr->react_action = RRDCALC_REACT_NONE;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void rrdcalc_rrdhost_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) {
|
||||
RRDCALC *rc = rrdcalc;
|
||||
struct rrdcalc_constructor *ctr = constructor_data;
|
||||
|
||||
if(ctr->react_action == RRDCALC_REACT_NEW)
|
||||
rrdcalc_link_to_rrdset(rc);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC rrdhost index management - destructor
|
||||
|
||||
static void rrdcalc_free_internals(RRDCALC *rc) {
|
||||
if(unlikely(!rc)) return;
|
||||
|
||||
rrd_alert_match_cleanup(&rc->match);
|
||||
rrd_alert_config_cleanup(&rc->config);
|
||||
|
||||
string_freez(rc->key);
|
||||
string_freez(rc->chart);
|
||||
|
||||
string_freez(rc->info);
|
||||
string_freez(rc->summary);
|
||||
}
|
||||
|
||||
static void rrdcalc_rrdhost_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdhost __maybe_unused) {
|
||||
RRDCALC *rc = rrdcalc;
|
||||
//RRDHOST *host = rrdhost;
|
||||
|
||||
if(unlikely(rc->rrdset))
|
||||
rrdcalc_unlink_from_rrdset(rc, false);
|
||||
|
||||
// any destruction actions that require other locks
|
||||
// have to be placed in rrdcalc_del(), because the object is actually locked for deletion
|
||||
|
||||
rrdcalc_free_internals(rc);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC rrdhost index management - index API
|
||||
|
||||
void rrdcalc_rrdhost_index_init(RRDHOST *host) {
|
||||
if(!host->rrdcalc_root_index) {
|
||||
host->rrdcalc_root_index = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
&dictionary_stats_category_rrdhealth, sizeof(RRDCALC));
|
||||
|
||||
dictionary_register_insert_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_insert_callback, NULL);
|
||||
dictionary_register_conflict_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_conflict_callback, NULL);
|
||||
dictionary_register_react_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_react_callback, NULL);
|
||||
dictionary_register_delete_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_delete_callback, host);
|
||||
}
|
||||
}
|
||||
|
||||
void rrdcalc_rrdhost_index_destroy(RRDHOST *host) {
|
||||
dictionary_destroy(host->rrdcalc_root_index);
|
||||
host->rrdcalc_root_index = NULL;
|
||||
}
|
||||
|
||||
bool rrdcalc_add_from_prototype(RRDHOST *host, RRDSET *st, RRD_ALERT_PROTOTYPE *ap) {
|
||||
char key[RRDCALC_MAX_KEY_SIZE + 1];
|
||||
size_t key_len = rrdcalc_key(key, RRDCALC_MAX_KEY_SIZE,
|
||||
string2str(st->id), string2str(ap->config.name));
|
||||
|
||||
struct rrdcalc_constructor tmp = {
|
||||
.ap = ap,
|
||||
.rrdset = st,
|
||||
.react_action = RRDCALC_REACT_NONE,
|
||||
};
|
||||
|
||||
bool ret = true;
|
||||
|
||||
dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)key_len,
|
||||
NULL, sizeof(RRDCALC), &tmp);
|
||||
|
||||
if(tmp.react_action != RRDCALC_REACT_NEW)
|
||||
ret = false;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void rrdcalc_unlink_and_delete(RRDHOST *host, RRDCALC *rc, bool having_ll_wrlock) {
|
||||
if(rc->rrdset)
|
||||
rrdcalc_unlink_from_rrdset(rc, having_ll_wrlock);
|
||||
|
||||
dictionary_del_advanced(host->rrdcalc_root_index, string2str(rc->key), (ssize_t)string_strlen(rc->key));
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALC cleanup API functions
|
||||
|
||||
void rrdcalc_unlink_and_delete_all_rrdset_alerts(RRDSET *st) {
|
||||
RRDCALC *rc, *last = NULL;
|
||||
rw_spinlock_write_lock(&st->alerts.spinlock);
|
||||
while((rc = st->alerts.base)) {
|
||||
if(last == rc) {
|
||||
netdata_log_error("RRDCALC: malformed list of alerts linked to chart - cannot cleanup - giving up.");
|
||||
break;
|
||||
}
|
||||
last = rc;
|
||||
|
||||
rrdcalc_unlink_and_delete(st->rrdhost, rc, true);
|
||||
}
|
||||
rw_spinlock_write_unlock(&st->alerts.spinlock);
|
||||
}
|
||||
|
||||
void rrdcalc_delete_all(RRDHOST *host) {
|
||||
dictionary_flush(host->rrdcalc_root_index);
|
||||
}
|
||||
|
||||
void rrd_alert_match_cleanup(struct rrd_alert_match *am) {
|
||||
if(am->is_template)
|
||||
string_freez(am->on.context);
|
||||
else
|
||||
string_freez(am->on.chart);
|
||||
|
||||
string_freez(am->os);
|
||||
simple_pattern_free(am->os_pattern);
|
||||
|
||||
string_freez(am->host);
|
||||
simple_pattern_free(am->host_pattern);
|
||||
|
||||
string_freez(am->plugin);
|
||||
simple_pattern_free(am->plugin_pattern);
|
||||
|
||||
string_freez(am->module);
|
||||
simple_pattern_free(am->module_pattern);
|
||||
|
||||
string_freez(am->charts);
|
||||
simple_pattern_free(am->charts_pattern);
|
||||
|
||||
string_freez(am->host_labels);
|
||||
simple_pattern_free(am->host_labels_pattern);
|
||||
|
||||
string_freez(am->chart_labels);
|
||||
simple_pattern_free(am->chart_labels_pattern);
|
||||
}
|
||||
|
||||
void rrd_alert_config_cleanup(struct rrd_alert_config *ac) {
|
||||
string_freez(ac->name);
|
||||
|
||||
string_freez(ac->exec);
|
||||
string_freez(ac->recipient);
|
||||
|
||||
string_freez(ac->classification);
|
||||
string_freez(ac->component);
|
||||
string_freez(ac->type);
|
||||
|
||||
string_freez(ac->source);
|
||||
string_freez(ac->units);
|
||||
string_freez(ac->summary);
|
||||
string_freez(ac->info);
|
||||
string_freez(ac->lookup);
|
||||
|
||||
string_freez(ac->dimensions);
|
||||
|
||||
expression_free(ac->calculation);
|
||||
expression_free(ac->warning);
|
||||
expression_free(ac->critical);
|
||||
}
|
146
health/rrdcalc.h
Normal file
146
health/rrdcalc.h
Normal file
|
@ -0,0 +1,146 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "../database/rrd.h"
|
||||
#include "../web/api/queries/rrdr.h"
|
||||
#include "health_prototypes.h"
|
||||
|
||||
#ifndef NETDATA_RRDCALC_H
|
||||
#define NETDATA_RRDCALC_H 1
|
||||
|
||||
// calculated variables (defined in health configuration)
|
||||
// These aggregate time-series data at fixed intervals
|
||||
// (defined in their update_every member below)
|
||||
// They increase the overhead of netdata.
|
||||
//
|
||||
// These calculations are stored under RRDHOST.
|
||||
// Then are also linked to RRDSET (of course only when a
|
||||
// matching chart is found).
|
||||
|
||||
typedef enum rrdcalc_status {
|
||||
RRDCALC_STATUS_REMOVED = -2,
|
||||
RRDCALC_STATUS_UNDEFINED = -1,
|
||||
RRDCALC_STATUS_UNINITIALIZED = 0,
|
||||
RRDCALC_STATUS_CLEAR = 1,
|
||||
RRDCALC_STATUS_RAISED = 2, // DO NOT CHANGE THESE NUMBERS
|
||||
RRDCALC_STATUS_WARNING = 3, // DO NOT CHANGE THESE NUMBERS
|
||||
RRDCALC_STATUS_CRITICAL = 4, // DO NOT CHANGE THESE NUMBERS
|
||||
} RRDCALC_STATUS;
|
||||
|
||||
typedef enum {
|
||||
RRDCALC_FLAG_DB_ERROR = (1 << 0),
|
||||
RRDCALC_FLAG_DB_NAN = (1 << 1),
|
||||
// RRDCALC_FLAG_DB_STALE = (1 << 2),
|
||||
RRDCALC_FLAG_CALC_ERROR = (1 << 3),
|
||||
RRDCALC_FLAG_WARN_ERROR = (1 << 4),
|
||||
RRDCALC_FLAG_CRIT_ERROR = (1 << 5),
|
||||
RRDCALC_FLAG_RUNNABLE = (1 << 6),
|
||||
RRDCALC_FLAG_DISABLED = (1 << 7),
|
||||
RRDCALC_FLAG_SILENCED = (1 << 8),
|
||||
RRDCALC_FLAG_RUN_ONCE = (1 << 9),
|
||||
} RRDCALC_FLAGS;
|
||||
void rrdcalc_flags_to_json_array(BUFFER *wb, const char *key, RRDCALC_FLAGS flags);
|
||||
|
||||
#define RRDCALC_ALL_OPTIONS_EXCLUDING_THE_RRDR_ONES (RRDCALC_OPTION_NO_CLEAR_NOTIFICATION)
|
||||
|
||||
struct rrdcalc {
|
||||
uint32_t id; // the unique id of this alarm
|
||||
uint32_t next_event_id; // the next event id that will be used for this alarm
|
||||
|
||||
STRING *key; // the unique key in the host's rrdcalc_root_index
|
||||
STRING *chart; // the chart id this should be linked to
|
||||
|
||||
struct rrd_alert_match match;
|
||||
struct rrd_alert_config config;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// runtime information
|
||||
|
||||
STRING *summary; // the original summary field before any variable replacement
|
||||
STRING *info; // the original info field before any variable replacement
|
||||
|
||||
RRDCALC_STATUS old_status; // the old status of the alarm
|
||||
RRDCALC_STATUS status; // the current status of the alarm
|
||||
|
||||
NETDATA_DOUBLE value; // the current value of the alarm
|
||||
NETDATA_DOUBLE old_value; // the previous value of the alarm
|
||||
NETDATA_DOUBLE last_status_change_value; // the value at the last status change
|
||||
|
||||
RRDCALC_FLAGS run_flags; // check RRDCALC_FLAG_*
|
||||
|
||||
time_t last_updated; // the last update timestamp of the alarm
|
||||
time_t next_update; // the next update timestamp of the alarm
|
||||
time_t last_status_change; // the timestamp of the last time this alarm changed status
|
||||
time_t last_repeat; // the last time the alarm got repeated
|
||||
uint32_t times_repeat; // number of times the alarm got repeated
|
||||
|
||||
time_t db_after; // the first timestamp evaluated by the db lookup
|
||||
time_t db_before; // the last timestamp evaluated by the db lookup
|
||||
|
||||
time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications
|
||||
int delay_up_current; // the current up notification delay duration
|
||||
int delay_down_current; // the current down notification delay duration
|
||||
int delay_last; // the last delay we used
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// the chart this alarm it is linked to
|
||||
|
||||
size_t labels_version;
|
||||
struct rrdset *rrdset;
|
||||
|
||||
struct rrdcalc *next;
|
||||
struct rrdcalc *prev;
|
||||
};
|
||||
|
||||
#define rrdcalc_name(rc) string2str((rc)->config.name)
|
||||
#define rrdcalc_chart_name(rc) string2str((rc)->chart)
|
||||
#define rrdcalc_exec(rc) string2str((rc)->config.exec)
|
||||
#define rrdcalc_recipient(rc) string2str((rc)->config.recipient)
|
||||
#define rrdcalc_classification(rc) string2str((rc)->config.classification)
|
||||
#define rrdcalc_component(rc) string2str((rc)->config.component)
|
||||
#define rrdcalc_type(rc) string2str((rc)->config.type)
|
||||
#define rrdcalc_source(rc) string2str((rc)->config.source)
|
||||
#define rrdcalc_units(rc) string2str((rc)->config.units)
|
||||
#define rrdcalc_dimensions(rc) string2str((rc)->config.dimensions)
|
||||
|
||||
#define foreach_rrdcalc_in_rrdhost_read(host, rc) \
|
||||
dfe_start_read((host)->rrdcalc_root_index, rc) \
|
||||
|
||||
#define foreach_rrdcalc_in_rrdhost_reentrant(host, rc) \
|
||||
dfe_start_reentrant((host)->rrdcalc_root_index, rc)
|
||||
|
||||
#define foreach_rrdcalc_in_rrdhost_done(rc) \
|
||||
dfe_done(rc)
|
||||
|
||||
#define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->config.after)
|
||||
|
||||
void rrdcalc_update_info_using_rrdset_labels(RRDCALC *rc);
|
||||
|
||||
const RRDCALC_ACQUIRED *rrdcalc_from_rrdset_get(RRDSET *st, const char *alert_name);
|
||||
void rrdcalc_from_rrdset_release(RRDSET *st, const RRDCALC_ACQUIRED *rca);
|
||||
RRDCALC *rrdcalc_acquired_to_rrdcalc(const RRDCALC_ACQUIRED *rca);
|
||||
|
||||
const char *rrdcalc_status2string(RRDCALC_STATUS status);
|
||||
|
||||
uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id);
|
||||
|
||||
static inline int rrdcalc_isrepeating(RRDCALC *rc) {
|
||||
if (unlikely(rc->config.warn_repeat_every > 0 || rc->config.crit_repeat_every > 0)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void rrdcalc_unlink_and_delete_all_rrdset_alerts(RRDSET *st);
|
||||
void rrdcalc_delete_all(RRDHOST *host);
|
||||
|
||||
void rrdcalc_rrdhost_index_init(RRDHOST *host);
|
||||
void rrdcalc_rrdhost_index_destroy(RRDHOST *host);
|
||||
|
||||
void rrdcalc_unlink_and_delete(RRDHOST *host, RRDCALC *rc, bool having_ll_wrlock);
|
||||
|
||||
#define RRDCALC_VAR_MAX 100
|
||||
#define RRDCALC_VAR_FAMILY "${family}"
|
||||
#define RRDCALC_VAR_LABEL "${label:"
|
||||
#define RRDCALC_VAR_LABEL_LEN (sizeof(RRDCALC_VAR_LABEL)-1)
|
||||
|
||||
#endif //NETDATA_RRDCALC_H
|
342
health/rrdvar.c
Normal file
342
health/rrdvar.c
Normal file
|
@ -0,0 +1,342 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "database/rrd.h"
|
||||
|
||||
typedef struct rrdvar {
|
||||
NETDATA_DOUBLE value;
|
||||
} RRDVAR;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDVAR management
|
||||
|
||||
inline int rrdvar_fix_name(char *variable) {
|
||||
int fixed = 0;
|
||||
while(*variable) {
|
||||
if (!isalnum(*variable) && *variable != '.' && *variable != '_') {
|
||||
*variable++ = '_';
|
||||
fixed++;
|
||||
}
|
||||
else
|
||||
variable++;
|
||||
}
|
||||
|
||||
return fixed;
|
||||
}
|
||||
|
||||
inline STRING *rrdvar_name_to_string(const char *name) {
|
||||
char *variable = strdupz(name);
|
||||
rrdvar_fix_name(variable);
|
||||
STRING *name_string = string_strdupz(variable);
|
||||
freez(variable);
|
||||
return name_string;
|
||||
}
|
||||
|
||||
static bool rrdvar_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) {
|
||||
RRDVAR *rv = old_value;
|
||||
RRDVAR *nrv = new_value;
|
||||
|
||||
rv->value = nrv->value;
|
||||
return false;
|
||||
}
|
||||
|
||||
DICTIONARY *rrdvariables_create(void) {
|
||||
DICTIONARY *dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
|
||||
&dictionary_stats_category_rrdhealth, sizeof(RRDVAR));
|
||||
dictionary_register_conflict_callback(dict, rrdvar_conflict_callback, NULL);
|
||||
return dict;
|
||||
}
|
||||
|
||||
void rrdvariables_destroy(DICTIONARY *dict) {
|
||||
dictionary_destroy(dict);
|
||||
}
|
||||
|
||||
static inline const RRDVAR_ACQUIRED *rrdvar_get_and_acquire(DICTIONARY *dict, STRING *name) {
|
||||
return (const RRDVAR_ACQUIRED *)dictionary_get_and_acquire_item_advanced(dict, string2str(name), (ssize_t)string_strlen(name));
|
||||
}
|
||||
|
||||
inline const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(DICTIONARY *dict, STRING *name, NETDATA_DOUBLE value) {
|
||||
if(unlikely(!dict || !name)) return NULL;
|
||||
RRDVAR tmp = {
|
||||
.value = value,
|
||||
};
|
||||
return (const RRDVAR_ACQUIRED *)dictionary_set_and_acquire_item_advanced(
|
||||
dict, string2str(name), (ssize_t)string_strlen(name),
|
||||
&tmp, sizeof(tmp), NULL);
|
||||
}
|
||||
|
||||
void rrdvar_delete_all(DICTIONARY *dict) {
|
||||
dictionary_flush(dict);
|
||||
}
|
||||
|
||||
void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva) {
|
||||
if(unlikely(!dict || !rva)) return; // when health is not enabled
|
||||
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// CUSTOM HOST VARIABLES
|
||||
|
||||
inline int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data) {
|
||||
if(unlikely(!dict)) return 0; // when health is not enabled
|
||||
return dictionary_walkthrough_read(dict, callback, data);
|
||||
}
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_variable_add_and_acquire(RRDHOST *host, const char *name) {
|
||||
if(unlikely(!host->rrdvars)) return NULL; // when health is not enabled
|
||||
|
||||
STRING *name_string = rrdvar_name_to_string(name);
|
||||
const RRDVAR_ACQUIRED *rva = rrdvar_add_and_acquire(host->rrdvars, name_string, NAN);
|
||||
|
||||
string_freez(name_string);
|
||||
return rva;
|
||||
}
|
||||
|
||||
void rrdvar_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) {
|
||||
if(unlikely(!host->rrdvars || !rva)) return; // when health is not enabled
|
||||
|
||||
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
||||
if(rv->value != value) {
|
||||
rv->value = value;
|
||||
|
||||
// if the host is streaming, send this variable upstream immediately
|
||||
rrdpush_sender_send_this_host_variable_now(host, rva);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// CUSTOM CHART VARIABLES
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_chart_variable_add_and_acquire(RRDSET *st, const char *name) {
|
||||
if(unlikely(!st->rrdvars)) return NULL;
|
||||
|
||||
STRING *name_string = rrdvar_name_to_string(name);
|
||||
const RRDVAR_ACQUIRED *rs = rrdvar_add_and_acquire(st->rrdvars, name_string, NAN);
|
||||
string_freez(name_string);
|
||||
return rs;
|
||||
}
|
||||
|
||||
void rrdvar_chart_variable_set(RRDSET *st, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value) {
|
||||
if(unlikely(!st->rrdvars || !rva)) return;
|
||||
|
||||
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
||||
if(rv->value != value) {
|
||||
rv->value = value;
|
||||
rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDVAR lookup
|
||||
|
||||
NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva) {
|
||||
if(unlikely(!rva)) return NAN;
|
||||
RRDVAR *rv = dictionary_acquired_item_value((const DICTIONARY_ITEM *)rva);
|
||||
return rv->value;
|
||||
}
|
||||
|
||||
static inline bool rrdvar_get_value(DICTIONARY *dict, STRING *variable, NETDATA_DOUBLE *result) {
|
||||
bool found = false;
|
||||
|
||||
const RRDVAR_ACQUIRED *rva = rrdvar_get_and_acquire(dict, variable);
|
||||
if(rva) {
|
||||
*result = rrdvar2number(rva);
|
||||
found = true;
|
||||
dictionary_acquired_item_release(dict, (const DICTIONARY_ITEM *)rva);
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
bool rrdvar_get_custom_host_variable_value(RRDHOST *host, STRING *variable, NETDATA_DOUBLE *result) {
|
||||
return rrdvar_get_value(host->rrdvars, variable, result);
|
||||
}
|
||||
|
||||
bool rrdvar_get_custom_chart_variable_value(RRDSET *st, STRING *variable, NETDATA_DOUBLE *result) {
|
||||
return rrdvar_get_value(st->rrdvars, variable, result);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDVAR to JSON
|
||||
|
||||
void rrdvar_to_json_members(DICTIONARY *dict, BUFFER *wb) {
|
||||
RRDVAR *rv;
|
||||
dfe_start_read(dict, rv) {
|
||||
buffer_json_member_add_double(wb, rv_dfe.name, rv->value);
|
||||
}
|
||||
dfe_done(rv);
|
||||
}
|
||||
|
||||
void health_api_v1_chart_custom_variables2json(RRDSET *st, BUFFER *buf) {
|
||||
rrdvar_to_json_members(st->rrdvars, buf);
|
||||
}
|
||||
|
||||
void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *wb) {
|
||||
|
||||
// FIXME this list is incomplete
|
||||
// alerts can also access {context}.{dimension} from the entire host database
|
||||
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
|
||||
|
||||
buffer_json_member_add_string(wb, "chart", rrdset_id(st));
|
||||
buffer_json_member_add_string(wb, "chart_name", rrdset_name(st));
|
||||
buffer_json_member_add_string(wb, "chart_context", rrdset_context(st));
|
||||
buffer_json_member_add_string(wb, "family", rrdset_family(st));
|
||||
buffer_json_member_add_string(wb, "host", rrdhost_hostname(host));
|
||||
|
||||
time_t now = now_realtime_sec();
|
||||
|
||||
buffer_json_member_add_object(wb, "current_alert_values");
|
||||
{
|
||||
buffer_json_member_add_double(wb, "this", NAN);
|
||||
buffer_json_member_add_double(wb, "after", (NETDATA_DOUBLE)now - 1);
|
||||
buffer_json_member_add_double(wb, "before", (NETDATA_DOUBLE)now);
|
||||
buffer_json_member_add_double(wb, "now", (NETDATA_DOUBLE)now);
|
||||
buffer_json_member_add_double(wb, "status", (NETDATA_DOUBLE)RRDCALC_STATUS_REMOVED);
|
||||
buffer_json_member_add_double(wb, "REMOVED", (NETDATA_DOUBLE)RRDCALC_STATUS_REMOVED);
|
||||
buffer_json_member_add_double(wb, "UNDEFINED", (NETDATA_DOUBLE)RRDCALC_STATUS_UNDEFINED);
|
||||
buffer_json_member_add_double(wb, "UNINITIALIZED", (NETDATA_DOUBLE)RRDCALC_STATUS_UNINITIALIZED);
|
||||
buffer_json_member_add_double(wb, "CLEAR", (NETDATA_DOUBLE)RRDCALC_STATUS_CLEAR);
|
||||
buffer_json_member_add_double(wb, "WARNING", (NETDATA_DOUBLE)RRDCALC_STATUS_WARNING);
|
||||
buffer_json_member_add_double(wb, "CRITICAL", (NETDATA_DOUBLE)RRDCALC_STATUS_CRITICAL);
|
||||
buffer_json_member_add_double(wb, "green", NAN);
|
||||
buffer_json_member_add_double(wb, "red", NAN);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
|
||||
buffer_json_member_add_object(wb, "dimensions_last_stored_values");
|
||||
{
|
||||
RRDDIM *rd;
|
||||
dfe_start_read(st->rrddim_root_index, rd) {
|
||||
buffer_json_member_add_double(wb, string2str(rd->id), rd->collector.last_stored_value);
|
||||
if(rd->name != rd->id)
|
||||
buffer_json_member_add_double(wb, string2str(rd->name), rd->collector.last_stored_value);
|
||||
}
|
||||
dfe_done(rd);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
|
||||
buffer_json_member_add_object(wb, "dimensions_last_collected_values");
|
||||
{
|
||||
char name[RRD_ID_LENGTH_MAX + 1 + 100];
|
||||
RRDDIM *rd;
|
||||
dfe_start_read(st->rrddim_root_index, rd) {
|
||||
snprintfz(name, sizeof(name), "%s_raw", string2str(rd->id));
|
||||
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_value);
|
||||
if(rd->name != rd->id) {
|
||||
snprintfz(name, sizeof(name), "%s_raw", string2str(rd->name));
|
||||
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_value);
|
||||
}
|
||||
}
|
||||
dfe_done(rd);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
|
||||
buffer_json_member_add_object(wb, "dimensions_last_collected_time");
|
||||
{
|
||||
char name[RRD_ID_LENGTH_MAX + 1 + 100];
|
||||
RRDDIM *rd;
|
||||
dfe_start_read(st->rrddim_root_index, rd) {
|
||||
snprintfz(name, sizeof(name), "%s_last_collected_t", string2str(rd->id));
|
||||
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_time.tv_sec);
|
||||
if(rd->name != rd->id) {
|
||||
snprintfz(name, sizeof(name), "%s_last_collected_t", string2str(rd->name));
|
||||
buffer_json_member_add_int64(wb, name, rd->collector.last_collected_time.tv_sec);
|
||||
}
|
||||
}
|
||||
dfe_done(rd);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
|
||||
buffer_json_member_add_object(wb, "chart_variables");
|
||||
{
|
||||
buffer_json_member_add_int64(wb, "update_every", st->update_every);
|
||||
buffer_json_member_add_uint64(wb, "last_collected_t", st->last_collected_time.tv_sec);
|
||||
|
||||
rrdvar_to_json_members(st->rrdvars, wb);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
|
||||
buffer_json_member_add_object(wb, "host_variables");
|
||||
{
|
||||
rrdvar_to_json_members(st->rrdhost->rrdvars, wb);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
|
||||
buffer_json_member_add_object(wb, "alerts");
|
||||
{
|
||||
struct scored {
|
||||
bool existing;
|
||||
STRING *chart;
|
||||
STRING *context;
|
||||
NETDATA_DOUBLE value;
|
||||
size_t score;
|
||||
} tmp, *z;
|
||||
DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE);
|
||||
|
||||
RRDCALC *rc;
|
||||
dfe_start_read(st->rrdhost->rrdcalc_root_index, rc) {
|
||||
tmp = (struct scored) {
|
||||
.existing = false,
|
||||
.chart = string_dup(rc->rrdset->id),
|
||||
.context = string_dup(rc->rrdset->context),
|
||||
.value = rc->value,
|
||||
.score = rrdlabels_common_count(rc->rrdset->rrdlabels, st->rrdlabels),
|
||||
};
|
||||
z = dictionary_set(dict, string2str(rc->config.name), &tmp, sizeof(tmp));
|
||||
|
||||
if(z->existing) {
|
||||
if(tmp.score > z->score)
|
||||
SWAP(*z, tmp);
|
||||
z->existing = true;
|
||||
string_freez(tmp.chart);
|
||||
string_freez(tmp.context);
|
||||
}
|
||||
else
|
||||
z->existing = true;
|
||||
}
|
||||
dfe_done(rc);
|
||||
|
||||
dfe_start_read(dict, z) {
|
||||
buffer_json_member_add_object(wb, z_dfe.name);
|
||||
{
|
||||
buffer_json_member_add_double(wb, "value", z->value);
|
||||
buffer_json_member_add_string(wb, "instance", string2str(z->chart));
|
||||
buffer_json_member_add_string(wb, "context", string2str(z->context));
|
||||
buffer_json_member_add_uint64(wb, "score", z->score);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
|
||||
string_freez(z->chart);
|
||||
string_freez(z->context);
|
||||
}
|
||||
dfe_done(z);
|
||||
|
||||
dictionary_destroy(dict);
|
||||
}
|
||||
buffer_json_object_close(wb);
|
||||
|
||||
buffer_json_finalize(wb);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDVAR private members examination
|
||||
|
||||
const char *rrdvar_name(const RRDVAR_ACQUIRED *rva) {
|
||||
return dictionary_acquired_item_name((const DICTIONARY_ITEM *)rva);
|
||||
}
|
||||
|
||||
void rrdvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb) {
|
||||
rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND_VARIABLES);
|
||||
|
||||
// send the chart local custom variables
|
||||
RRDVAR *rv;
|
||||
dfe_start_read(st->rrdvars, rv) {
|
||||
buffer_sprintf(wb
|
||||
, "VARIABLE CHART %s = " NETDATA_DOUBLE_FORMAT "\n"
|
||||
, rv_dfe.name, rv->value
|
||||
);
|
||||
}
|
||||
dfe_done(rv);
|
||||
}
|
44
health/rrdvar.h
Normal file
44
health/rrdvar.h
Normal file
|
@ -0,0 +1,44 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_RRDVAR_H
|
||||
#define NETDATA_RRDVAR_H 1
|
||||
|
||||
#include "libnetdata/libnetdata.h"
|
||||
|
||||
#define RRDVAR_MAX_LENGTH 1024
|
||||
|
||||
int rrdvar_fix_name(char *variable);
|
||||
|
||||
#include "database/rrd.h"
|
||||
|
||||
STRING *rrdvar_name_to_string(const char *name);
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_host_variable_add_and_acquire(RRDHOST *host, const char *name);
|
||||
void rrdvar_host_variable_set(RRDHOST *host, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value);
|
||||
|
||||
int rrdvar_walkthrough_read(DICTIONARY *dict, int (*callback)(const DICTIONARY_ITEM *item, void *rrdvar, void *data), void *data);
|
||||
|
||||
#define rrdvar_host_variable_release(host, rva) rrdvar_release((host)->rrdvars, rva)
|
||||
#define rrdvar_chart_variable_release(st, rva) rrdvar_release((st)->rrdvars, rva)
|
||||
void rrdvar_release(DICTIONARY *dict, const RRDVAR_ACQUIRED *rva);
|
||||
|
||||
NETDATA_DOUBLE rrdvar2number(const RRDVAR_ACQUIRED *rva);
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_add_and_acquire(DICTIONARY *dict, STRING *name, NETDATA_DOUBLE value);
|
||||
|
||||
DICTIONARY *rrdvariables_create(void);
|
||||
void rrdvariables_destroy(DICTIONARY *dict);
|
||||
|
||||
void rrdvar_delete_all(DICTIONARY *dict);
|
||||
|
||||
const char *rrdvar_name(const RRDVAR_ACQUIRED *rva);
|
||||
|
||||
void rrdvar_print_to_streaming_custom_chart_variables(RRDSET *st, BUFFER *wb);
|
||||
|
||||
const RRDVAR_ACQUIRED *rrdvar_chart_variable_add_and_acquire(RRDSET *st, const char *name);
|
||||
void rrdvar_chart_variable_set(RRDSET *st, const RRDVAR_ACQUIRED *rva, NETDATA_DOUBLE value);
|
||||
|
||||
bool rrdvar_get_custom_host_variable_value(RRDHOST *host, STRING *variable, NETDATA_DOUBLE *result);
|
||||
bool rrdvar_get_custom_chart_variable_value(RRDSET *st, STRING *variable, NETDATA_DOUBLE *result);
|
||||
|
||||
#endif //NETDATA_RRDVAR_H
|
509
health/schema.d/health:alert:prototype.json
Normal file
509
health/schema.d/health:alert:prototype.json
Normal file
|
@ -0,0 +1,509 @@
|
|||
{
|
||||
"jsonSchema": {
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"matchInstance": {
|
||||
"type": "object",
|
||||
"title": "Matching rule for a specific instance",
|
||||
"description": "This rule will be applied to a specific instance on all nodes",
|
||||
"properties": {
|
||||
"on": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"title": "The instance this rule should be applied to",
|
||||
"description": "You can find the instance names on all charts at the instances drop down menu. Do not include the host name in this field."
|
||||
},
|
||||
"plugin": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match data collection plugins",
|
||||
"description": "A simple pattern to match the data collection plugins that are collecting the data this rule is to be applied to."
|
||||
},
|
||||
"module": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match data collection plugin modules",
|
||||
"description": "A simple pattern to match the data collection plugin modules that are collecting the data this rule is to be applied to."
|
||||
},
|
||||
"instance_labels": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match instance labels",
|
||||
"description": "A simple pattern to match the instance labels of the instances this rule is to be applied to."
|
||||
},
|
||||
"host_labels": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match node labels",
|
||||
"description": "A simple pattern to match the node labels of the nodes this rule is to be applied to."
|
||||
},
|
||||
"os": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match operating system",
|
||||
"description": "A simple pattern to match the operating system name of the nodes this rule is to be applied to."
|
||||
},
|
||||
"host": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match node hostnames",
|
||||
"description": "A simple pattern to match the hostnames of the nodes this rule is to be applied to."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"on",
|
||||
"os",
|
||||
"host",
|
||||
"plugin",
|
||||
"module",
|
||||
"host_labels",
|
||||
"instance_labels"
|
||||
]
|
||||
},
|
||||
"matchTemplate": {
|
||||
"type": "object",
|
||||
"title": "Matching rule for applying the alert to multiple instances",
|
||||
"description": "This rule will applied to all instances on all nodes.",
|
||||
"properties": {
|
||||
"on": {
|
||||
"type": "string",
|
||||
"default": "",
|
||||
"title": "The context of the instances this rule should be applied to",
|
||||
"description": "You can find the context at the title bar of all charts in the Metrics dashboard, between the chart title and the units (like system.cpu, or disk.io, etc)."
|
||||
},
|
||||
"plugin": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match data collection plugins",
|
||||
"description": "A simple pattern to match the data collection plugins that are collecting the data this rule is to be applied to."
|
||||
},
|
||||
"module": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match data collection plugin modules",
|
||||
"description": "A simple pattern to match the data collection plugin modules that are collecting the data this rule is to be applied to."
|
||||
},
|
||||
"instance_labels": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match instance labels",
|
||||
"description": "A simple pattern to match the instance labels of the instances this rule is to be applied to."
|
||||
},
|
||||
"instances": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match instance names",
|
||||
"description": "A simple pattern to match the instance names of the instances this rule is to be applied to."
|
||||
},
|
||||
"host_labels": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match node labels",
|
||||
"description": "A simple pattern to match the node labels of the nodes this rule is to be applied to."
|
||||
},
|
||||
"os": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match operating system",
|
||||
"description": "A simple pattern to match the operating system name of the nodes this rule is to be applied to."
|
||||
},
|
||||
"host": {
|
||||
"type": "string",
|
||||
"default": "*",
|
||||
"title": "Match node hostnames",
|
||||
"description": "A simple pattern to match the hostnames of the nodes this rule is to be applied to."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"on",
|
||||
"os",
|
||||
"host",
|
||||
"plugin",
|
||||
"module",
|
||||
"host_labels",
|
||||
"instances",
|
||||
"instance_labels"
|
||||
]
|
||||
},
|
||||
"config": {
|
||||
"type": "object",
|
||||
"title": "Alert Configuration",
|
||||
"description": "The properties that control the value the alert will get, the conditions it will trigger, the back-off for notifications, the auto-repeating of notifications, etc.",
|
||||
"properties": {
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"title": "Short description of the alert",
|
||||
"description": "This field is used in notification as a short description of the alert. Variables, like ${label:key}, are replaced with the value of instance label called 'key'."
|
||||
},
|
||||
"info": {
|
||||
"type": "string",
|
||||
"title": "Long description of the alert",
|
||||
"description": "This field is used to provide enough information about the type and nature of the alert. Variables, like ${label:key}, are replaced with the value of instance label called 'key'."
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"title": "Alert Type",
|
||||
"description": "Use categories like: 'System', 'Containers', 'Web Servers', 'Message Brokers', etc."
|
||||
},
|
||||
"component": {
|
||||
"type": "string",
|
||||
"title": "Alert Component (sub-type)",
|
||||
"description": "Component is a sub-type of Alert Type. Examples: 'CPU', 'Memory', 'Network', 'Disk', 'Hardware', 'nginx', 'redis', 'postgresql', etc."
|
||||
},
|
||||
"classification": {
|
||||
"type": "string",
|
||||
"title": "Classification",
|
||||
"description": "Use 'Workload', 'Utilization', 'Latency', 'Availability', 'Errors', etc."
|
||||
},
|
||||
"value": {
|
||||
"type": "object",
|
||||
"title": "Alert Value",
|
||||
"description": "Each alert has a value. This section defines how this value is calculated.",
|
||||
"properties": {
|
||||
"database_lookup": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"after": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"title": "Time-Series Oldest Time",
|
||||
"description": "The oldest timestamp of the time-series data to be included in the query. Negative values define a duration in seconds in the past (so, -60 means a minute ago)."
|
||||
},
|
||||
"before": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"title": "Time-Series Newest Time",
|
||||
"description": "The newest timestamp of the time-series data to be included in the query. Negative value define a duration in seconds in the past (so, -60 means a minute ago). Zero means now."
|
||||
},
|
||||
"grouping": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"average",
|
||||
"median",
|
||||
"min",
|
||||
"max",
|
||||
"sum",
|
||||
"incremental_sum",
|
||||
"stddev",
|
||||
"cv",
|
||||
"trimmed-mean1",
|
||||
"trimmed-mean2",
|
||||
"trimmed-mean3",
|
||||
"trimmed-mean",
|
||||
"trimmed-mean10",
|
||||
"trimmed-mean15",
|
||||
"trimmed-mean20",
|
||||
"trimmed-mean25",
|
||||
"trimmed-median1",
|
||||
"trimmed-median2",
|
||||
"trimmed-median3",
|
||||
"trimmed-median",
|
||||
"trimmed-median10",
|
||||
"trimmed-median15",
|
||||
"trimmed-median20",
|
||||
"trimmed-median25",
|
||||
"percentile99",
|
||||
"percentile98",
|
||||
"percentile97",
|
||||
"percentile",
|
||||
"percentile90",
|
||||
"percentile80",
|
||||
"percentile75",
|
||||
"percentile50",
|
||||
"percentile25",
|
||||
"ses",
|
||||
"des",
|
||||
"countif"
|
||||
],
|
||||
"enumNames": [
|
||||
"The mean (average) value",
|
||||
"The median value",
|
||||
"The minimum value",
|
||||
"The maximum value",
|
||||
"The sum of all the values",
|
||||
"The delta of the latest and oldest values",
|
||||
"The standard deviation of the values",
|
||||
"The standard deviation expresses as a % of the mean value",
|
||||
"The mean after trimming 1% of the extreme values",
|
||||
"The mean after trimming 2% of the extreme values",
|
||||
"The mean after trimming 3% of the extreme values",
|
||||
"The mean after trimming 5% of the extreme values",
|
||||
"The mean after trimming 10% of the extreme values",
|
||||
"The mean after trimming 15% of the extreme values",
|
||||
"The mean after trimming 20% of the extreme values",
|
||||
"The mean after trimming 25% of the extreme values",
|
||||
"The median after trimming 1% of the extreme values",
|
||||
"The median after trimming 2% of the extreme values",
|
||||
"The median after trimming 3% of the extreme values",
|
||||
"The median after trimming 5% of the extreme values",
|
||||
"The median after trimming 10% of the extreme values",
|
||||
"The median after trimming 15% of the extreme values",
|
||||
"The median after trimming 20% of the extreme values",
|
||||
"The median after trimming 25% of the extreme values",
|
||||
"The 99th percentile of the values",
|
||||
"The 98th percentile of the values",
|
||||
"The 97th percentile of the values",
|
||||
"The 95th percentile of the values",
|
||||
"The 90th percentile of the values",
|
||||
"The 80th percentile of the values",
|
||||
"The 75th percentile of the values",
|
||||
"The 50th percentile of the values",
|
||||
"The 25th percentile of the values",
|
||||
"Single Exponential Smoothing",
|
||||
"Double Exponential Smoothing",
|
||||
"Count If zero"
|
||||
],
|
||||
"default": "average",
|
||||
"title": "Time Aggregation Function",
|
||||
"description": "When querying time-series data we need to come up with a single value. This function is used to aggregate all the values of the time-series data to a single value."
|
||||
},
|
||||
"options": {
|
||||
"type": "array",
|
||||
"title": "Time-Series Query options",
|
||||
"description": "Options affecting the way the value is calculated",
|
||||
"uniqueItems": true,
|
||||
"items": {
|
||||
"enum": [
|
||||
"unaligned",
|
||||
"abs",
|
||||
"min2max",
|
||||
"null2zero",
|
||||
"percentage",
|
||||
"anomaly-bit",
|
||||
"match_ids",
|
||||
"match_names"
|
||||
],
|
||||
"enumNames": [
|
||||
"Do not shift the time-frame for visual presentation",
|
||||
"Make all values positive before using them",
|
||||
"Use the delta of the minimum to the maximum value",
|
||||
"Treat gaps in the time-series as a zero value",
|
||||
"Calculate the percentage of the selected dimensions over the sum of all dimensions",
|
||||
"Query the anomaly rate of the samples collected",
|
||||
"Match only dimension IDs, not Names",
|
||||
"Match only dimension Names, not IDs"
|
||||
]
|
||||
},
|
||||
"default": [ "unaligned" ]
|
||||
},
|
||||
"dimensions": {
|
||||
"type": "string",
|
||||
"title": "Dimensions Selection Pattern",
|
||||
"description": "A simple pattern to match the dimensions that should be included in the query",
|
||||
"default": "*"
|
||||
}
|
||||
}
|
||||
},
|
||||
"calculation": {
|
||||
"type": "string",
|
||||
"title": "Calculation Expression",
|
||||
"description": "The database value is available as '$this'. This expression can utilize variables to transform the value of the alert."
|
||||
},
|
||||
"units": {
|
||||
"type": "string",
|
||||
"title": "Alert Unit of Measurement",
|
||||
"description": "The unit of measurement the alert value is expressed with. If unset, the units of the instance the alert is attached to will be used."
|
||||
}
|
||||
}
|
||||
},
|
||||
"conditions": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"green": {
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"title": "Healthy threshold ($green)",
|
||||
"description": "A threshold that indicates a healthy status. This threshold can be used as '$green' in the alert conditions."
|
||||
},
|
||||
"red": {
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"title": "Critical threshold ($red)",
|
||||
"description": "A threshold that indicates a critical status. This threshold can be used as '$red' in the alert conditions."
|
||||
},
|
||||
"warning_condition": {
|
||||
"type": "string",
|
||||
"title": "Warning Expression",
|
||||
"description": "The alert value is available as '$this'. If this expression evaluates to a non-zero value, the alert is considered to be in warning level."
|
||||
},
|
||||
"critical_condition": {
|
||||
"type": "string",
|
||||
"title": "Critical Expression",
|
||||
"description": "The alert value is available as '$this'. If this expression evaluates to a non-zero value, the alert is considered to be in critical level."
|
||||
}
|
||||
}
|
||||
},
|
||||
"action": {
|
||||
"type": "object",
|
||||
"title": "Alert Action",
|
||||
"description": "The action the alert should take when it transitions states",
|
||||
"properties": {
|
||||
"execute": {
|
||||
"type": "string",
|
||||
"title": "Command to execute when the alert transitions states",
|
||||
"description": "Leave this empty to get the default alert notification"
|
||||
},
|
||||
"recipient": {
|
||||
"type": "string",
|
||||
"title": "Notification Recipients",
|
||||
"description": "A space separated list of the recipients of the alert notifications. The special recipient 'silent' prevents this alert from taking any action (i.e. sending notifications)."
|
||||
},
|
||||
"options": {
|
||||
"type": "array",
|
||||
"title": "Action Options",
|
||||
"description": "Options related to the actions this alert will take.",
|
||||
"uniqueItems": true,
|
||||
"items": {
|
||||
"enum": [
|
||||
"no-clear-notification"
|
||||
],
|
||||
"enumNames": [
|
||||
"Do not perform any action when the alert is cleared"
|
||||
]
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"delay": {
|
||||
"type": "object",
|
||||
"title": "Delay the action (notification)",
|
||||
"description": "Rules to postpone the action, to avoid multiple notifications on flapping alerts.",
|
||||
"properties": {
|
||||
"up": {
|
||||
"type": "integer",
|
||||
"title": "Delay when raising",
|
||||
"description": "Delay the action (notification) that many seconds, when the alert is rising."
|
||||
},
|
||||
"down": {
|
||||
"type": "integer",
|
||||
"title": "Delay when going Down",
|
||||
"description": "Delay the action (notification) that many seconds, when the alert is recovering."
|
||||
},
|
||||
"max": {
|
||||
"type": "integer",
|
||||
"title": "Max Acceptable Delay",
|
||||
"description": "The maximum acceptable delay in seconds, for taking the action (notification)."
|
||||
},
|
||||
"multiplier": {
|
||||
"type": "number",
|
||||
"title": "Back-Off on Transitions",
|
||||
"description": "Multiply the delay by this number, every time the alert transitions to a new state, while the action (notification) is being delayed."
|
||||
}
|
||||
}
|
||||
},
|
||||
"repeat": {
|
||||
"type": "object",
|
||||
"title": "Action Auto-Repeat",
|
||||
"description": "Repeat the action while the alert is raised.",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"warning": {
|
||||
"type": "integer",
|
||||
"title": "Repeat on Warning",
|
||||
"description": "The number of seconds to repeat the action while the alert is in warning state"
|
||||
},
|
||||
"critical": {
|
||||
"type": "integer",
|
||||
"title": "Repeat on Critical",
|
||||
"description": "The number of seconds to repeat the action while the alert is in critical state"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
]
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"format_version": {
|
||||
"type": "integer",
|
||||
"default": 1
|
||||
},
|
||||
"rules": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"enabled": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
"title": "Enabled"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"instance",
|
||||
"template"
|
||||
],
|
||||
"enumNames": [
|
||||
"Apply this rule to a specific instance (deprecated)",
|
||||
"Apply this rule to all instances matching the rules"
|
||||
],
|
||||
"default": "template",
|
||||
"title": "Type of rule",
|
||||
"description": "Rules can be configured to match a specific instance (like a specific disk), or match all the instances (like all the disks). All rules are always checked against all nodes streamed to this Netdata, so the matching rules include patterns to match both instances and nodes."
|
||||
}
|
||||
},
|
||||
"required": [ "type", "enabled" ],
|
||||
"if": {
|
||||
"properties": {
|
||||
"type": { "const": "instance" }
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"match": { "$ref": "#/definitions/matchInstance" },
|
||||
"config": { "$ref": "#/definitions/config" }
|
||||
}
|
||||
},
|
||||
"else": {
|
||||
"properties": {
|
||||
"match": { "$ref": "#/definitions/matchTemplate" },
|
||||
"config": { "$ref": "#/definitions/config" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"rules"
|
||||
]
|
||||
},
|
||||
"uiSchema": {
|
||||
"format_version": {
|
||||
"ui:widget": "hidden"
|
||||
},
|
||||
"name": {
|
||||
"ui:widget": "hidden"
|
||||
},
|
||||
"rules": {
|
||||
"items": {
|
||||
"enabled": {
|
||||
"ui:widget": "checkbox"
|
||||
},
|
||||
"config": {
|
||||
"hash": {
|
||||
"ui:widget": "hidden"
|
||||
},
|
||||
"source_type": {
|
||||
"ui:widget": "hidden"
|
||||
},
|
||||
"source": {
|
||||
"ui:widget": "hidden"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -60,7 +60,7 @@ static usec_t get_clock_resolution(clockid_t clock) {
|
|||
|
||||
if(clock_getres(clock, &ts) == 0) {
|
||||
usec_t ret = (usec_t)ts.tv_sec * USEC_PER_SEC + (usec_t)ts.tv_nsec / NSEC_PER_USEC;
|
||||
if(!ret && ts.tv_nsec > 0 && ts.tv_nsec < NSEC_PER_USEC)
|
||||
if(!ret && ts.tv_nsec > 0 && ts.tv_nsec < (long int)NSEC_PER_USEC)
|
||||
return (usec_t)1;
|
||||
|
||||
else if(ret > MAX_CLOCK_RESOLUTION_UT) {
|
||||
|
|
|
@ -263,6 +263,7 @@ int dyncfg_node_find_and_call(DICTIONARY *dyncfg_nodes, const char *transaction,
|
|||
|
||||
const char *id = get_word(words, num_words, 1);
|
||||
const char *action = get_word(words, num_words, 2);
|
||||
const char *add_name = get_word(words, num_words, 3);
|
||||
|
||||
if(!id || !*id)
|
||||
return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "dyncfg node: id is missing from the request");
|
||||
|
@ -283,7 +284,7 @@ int dyncfg_node_find_and_call(DICTIONARY *dyncfg_nodes, const char *transaction,
|
|||
buffer_flush(result);
|
||||
result->content_type = CT_APPLICATION_JSON;
|
||||
|
||||
int code = df->cb(transaction, id, cmd, payload, stop_monotonic_ut, cancelled, result, source, df->data);
|
||||
int code = df->cb(transaction, id, cmd, add_name, payload, stop_monotonic_ut, cancelled, result, source, df->data);
|
||||
|
||||
if(!result->expires)
|
||||
result->expires = now_realtime_sec();
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#define DYNCFG_RESP_SUCCESS(code) (code >= 200 && code <= 299)
|
||||
#define DYNCFG_RESP_RUNNING 200 // accepted and running
|
||||
#define DYNCFG_RESP_ACCEPTED 202 // accepted, but not running yet
|
||||
#define DYNCFG_RESP_ACCEPTED_DISABLED 298 // accepted, but is disabled
|
||||
#define DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED 299 // accepted, but restart is required to apply it
|
||||
|
||||
typedef enum __attribute__((packed)) {
|
||||
|
@ -65,7 +66,7 @@ char *dyncfg_escape_id_for_filename(const char *id);
|
|||
#include "../buffer/buffer.h"
|
||||
#include "../dictionary/dictionary.h"
|
||||
|
||||
typedef int (*dyncfg_cb_t)(const char *transaction, const char *id, DYNCFG_CMDS cmd, BUFFER *payload, usec_t *stop_monotonic_ut, bool *cancelled, BUFFER *result, const char *source, void *data);
|
||||
typedef int (*dyncfg_cb_t)(const char *transaction, const char *id, DYNCFG_CMDS cmd, const char *add_name, BUFFER *payload, usec_t *stop_monotonic_ut, bool *cancelled, BUFFER *result, const char *source, void *data);
|
||||
|
||||
struct dyncfg_node {
|
||||
DYNCFG_TYPE type;
|
||||
|
|
|
@ -5,6 +5,11 @@
|
|||
// ----------------------------------------------------------------------------
|
||||
// data structures for storing the parsed expression in memory
|
||||
|
||||
typedef struct eval_variable {
|
||||
STRING *name;
|
||||
struct eval_variable *next;
|
||||
} EVAL_VARIABLE;
|
||||
|
||||
typedef struct eval_value {
|
||||
int type;
|
||||
|
||||
|
@ -24,6 +29,21 @@ typedef struct eval_node {
|
|||
EVAL_VALUE ops[];
|
||||
} EVAL_NODE;
|
||||
|
||||
struct eval_expression {
|
||||
STRING *source;
|
||||
STRING *parsed_as;
|
||||
|
||||
NETDATA_DOUBLE result;
|
||||
|
||||
int error;
|
||||
BUFFER *error_msg;
|
||||
|
||||
EVAL_NODE *nodes;
|
||||
|
||||
void *variable_lookup_cb_data;
|
||||
eval_expression_variable_lookup_t variable_lookup_cb;
|
||||
};
|
||||
|
||||
// these are used for EVAL_NODE.operator
|
||||
// they are used as internal IDs to identify an operator
|
||||
// THEY ARE NOT USED FOR PARSING OPERATORS LIKE THAT
|
||||
|
@ -62,124 +82,9 @@ static inline void print_parsed_as_constant(BUFFER *out, NETDATA_DOUBLE n);
|
|||
// evaluation of expressions
|
||||
|
||||
static inline NETDATA_DOUBLE eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABLE *v, int *error) {
|
||||
static STRING
|
||||
*this_string = NULL,
|
||||
*now_string = NULL,
|
||||
*after_string = NULL,
|
||||
*before_string = NULL,
|
||||
*status_string = NULL,
|
||||
*removed_string = NULL,
|
||||
*uninitialized_string = NULL,
|
||||
*undefined_string = NULL,
|
||||
*clear_string = NULL,
|
||||
*warning_string = NULL,
|
||||
*critical_string = NULL;
|
||||
|
||||
NETDATA_DOUBLE n;
|
||||
|
||||
if(unlikely(this_string == NULL)) {
|
||||
this_string = string_strdupz("this");
|
||||
now_string = string_strdupz("now");
|
||||
after_string = string_strdupz("after");
|
||||
before_string = string_strdupz("before");
|
||||
status_string = string_strdupz("status");
|
||||
removed_string = string_strdupz("REMOVED");
|
||||
uninitialized_string = string_strdupz("UNINITIALIZED");
|
||||
undefined_string = string_strdupz("UNDEFINED");
|
||||
clear_string = string_strdupz("CLEAR");
|
||||
warning_string = string_strdupz("WARNING");
|
||||
critical_string = string_strdupz("CRITICAL");
|
||||
}
|
||||
|
||||
if(unlikely(v->name == this_string)) {
|
||||
n = (exp->myself)?*exp->myself:NAN;
|
||||
buffer_strcat(exp->error_msg, "[ $this = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == after_string)) {
|
||||
n = (exp->after && *exp->after)?*exp->after:NAN;
|
||||
buffer_strcat(exp->error_msg, "[ $after = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == before_string)) {
|
||||
n = (exp->before && *exp->before)?*exp->before:NAN;
|
||||
buffer_strcat(exp->error_msg, "[ $before = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == now_string)) {
|
||||
n = (NETDATA_DOUBLE)now_realtime_sec();
|
||||
buffer_strcat(exp->error_msg, "[ $now = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == status_string)) {
|
||||
n = (exp->status)?*exp->status:RRDCALC_STATUS_UNINITIALIZED;
|
||||
buffer_strcat(exp->error_msg, "[ $status = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == removed_string)) {
|
||||
n = RRDCALC_STATUS_REMOVED;
|
||||
buffer_strcat(exp->error_msg, "[ $REMOVED = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == uninitialized_string)) {
|
||||
n = RRDCALC_STATUS_UNINITIALIZED;
|
||||
buffer_strcat(exp->error_msg, "[ $UNINITIALIZED = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == undefined_string)) {
|
||||
n = RRDCALC_STATUS_UNDEFINED;
|
||||
buffer_strcat(exp->error_msg, "[ $UNDEFINED = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == clear_string)) {
|
||||
n = RRDCALC_STATUS_CLEAR;
|
||||
buffer_strcat(exp->error_msg, "[ $CLEAR = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == warning_string)) {
|
||||
n = RRDCALC_STATUS_WARNING;
|
||||
buffer_strcat(exp->error_msg, "[ $WARNING = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(unlikely(v->name == critical_string)) {
|
||||
n = RRDCALC_STATUS_CRITICAL;
|
||||
buffer_strcat(exp->error_msg, "[ $CRITICAL = ");
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
return n;
|
||||
}
|
||||
|
||||
if(exp->rrdcalc && health_variable_lookup(v->name, exp->rrdcalc, &n)) {
|
||||
if(exp->variable_lookup_cb && exp->variable_lookup_cb(v->name, exp->variable_lookup_cb_data, &n)) {
|
||||
buffer_sprintf(exp->error_msg, "[ ${%s} = ", string2str(v->name));
|
||||
print_parsed_as_constant(exp->error_msg, n);
|
||||
buffer_strcat(exp->error_msg, " ] ");
|
||||
|
@ -1074,7 +979,7 @@ int expression_evaluate(EVAL_EXPRESSION *expression) {
|
|||
expression->error = EVAL_ERROR_OK;
|
||||
|
||||
buffer_reset(expression->error_msg);
|
||||
expression->result = eval_node(expression, (EVAL_NODE *)expression->nodes, &expression->error);
|
||||
expression->result = eval_node(expression, expression->nodes, &expression->error);
|
||||
|
||||
if(unlikely(isnan(expression->result))) {
|
||||
if(expression->error == EVAL_ERROR_OK)
|
||||
|
@ -1104,6 +1009,9 @@ int expression_evaluate(EVAL_EXPRESSION *expression) {
|
|||
}
|
||||
|
||||
EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, int *error) {
|
||||
if(!string || !*string)
|
||||
return NULL;
|
||||
|
||||
const char *s = string;
|
||||
int err = EVAL_ERROR_OK;
|
||||
|
||||
|
@ -1137,12 +1045,12 @@ EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, in
|
|||
|
||||
EVAL_EXPRESSION *exp = callocz(1, sizeof(EVAL_EXPRESSION));
|
||||
|
||||
exp->source = strdupz(string);
|
||||
exp->parsed_as = strdupz(buffer_tostring(out));
|
||||
exp->source = string_strdupz(string);
|
||||
exp->parsed_as = string_strdupz(buffer_tostring(out));
|
||||
buffer_free(out);
|
||||
|
||||
exp->error_msg = buffer_create(100, NULL);
|
||||
exp->nodes = (void *)op;
|
||||
exp->nodes = op;
|
||||
|
||||
return exp;
|
||||
}
|
||||
|
@ -1150,9 +1058,9 @@ EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, in
|
|||
void expression_free(EVAL_EXPRESSION *expression) {
|
||||
if(!expression) return;
|
||||
|
||||
if(expression->nodes) eval_node_free((EVAL_NODE *)expression->nodes);
|
||||
freez((void *)expression->source);
|
||||
freez((void *)expression->parsed_as);
|
||||
if(expression->nodes) eval_node_free(expression->nodes);
|
||||
string_freez((void *)expression->source);
|
||||
string_freez((void *)expression->parsed_as);
|
||||
buffer_free(expression->error_msg);
|
||||
freez(expression);
|
||||
}
|
||||
|
@ -1199,3 +1107,39 @@ const char *expression_strerror(int error) {
|
|||
return "unknown error";
|
||||
}
|
||||
}
|
||||
|
||||
const char *expression_source(EVAL_EXPRESSION *expression) {
|
||||
if(!expression)
|
||||
return string2str(NULL);
|
||||
|
||||
return string2str(expression->source);
|
||||
}
|
||||
|
||||
const char *expression_parsed_as(EVAL_EXPRESSION *expression) {
|
||||
if(!expression)
|
||||
return string2str(NULL);
|
||||
|
||||
return string2str(expression->parsed_as);
|
||||
}
|
||||
|
||||
const char *expression_error_msg(EVAL_EXPRESSION *expression) {
|
||||
if(!expression || !expression->error_msg)
|
||||
return "";
|
||||
|
||||
return buffer_tostring(expression->error_msg);
|
||||
}
|
||||
|
||||
NETDATA_DOUBLE expression_result(EVAL_EXPRESSION *expression) {
|
||||
if(!expression)
|
||||
return NAN;
|
||||
|
||||
return expression->result;
|
||||
}
|
||||
|
||||
void expression_set_variable_lookup_callback(EVAL_EXPRESSION *expression, eval_expression_variable_lookup_t cb, void *data) {
|
||||
if(!expression)
|
||||
return;
|
||||
|
||||
expression->variable_lookup_cb = cb;
|
||||
expression->variable_lookup_cb_data = data;
|
||||
}
|
||||
|
|
|
@ -7,41 +7,9 @@
|
|||
|
||||
#define EVAL_MAX_VARIABLE_NAME_LENGTH 300
|
||||
|
||||
typedef enum rrdcalc_status {
|
||||
RRDCALC_STATUS_REMOVED = -2,
|
||||
RRDCALC_STATUS_UNDEFINED = -1,
|
||||
RRDCALC_STATUS_UNINITIALIZED = 0,
|
||||
RRDCALC_STATUS_CLEAR = 1,
|
||||
RRDCALC_STATUS_RAISED = 2, // DO NOT CHANGE THESE NUMBERS
|
||||
RRDCALC_STATUS_WARNING = 3, // DO NOT CHANGE THESE NUMBERS
|
||||
RRDCALC_STATUS_CRITICAL = 4, // DO NOT CHANGE THESE NUMBERS
|
||||
} RRDCALC_STATUS;
|
||||
|
||||
typedef struct eval_variable {
|
||||
STRING *name;
|
||||
struct eval_variable *next;
|
||||
} EVAL_VARIABLE;
|
||||
|
||||
typedef struct eval_expression {
|
||||
const char *source;
|
||||
const char *parsed_as;
|
||||
|
||||
RRDCALC_STATUS *status;
|
||||
NETDATA_DOUBLE *myself;
|
||||
time_t *after;
|
||||
time_t *before;
|
||||
|
||||
NETDATA_DOUBLE result;
|
||||
|
||||
int error;
|
||||
BUFFER *error_msg;
|
||||
|
||||
// hidden EVAL_NODE *
|
||||
void *nodes;
|
||||
|
||||
// custom data to be used for looking up variables
|
||||
struct rrdcalc *rrdcalc;
|
||||
} EVAL_EXPRESSION;
|
||||
struct eval_expression;
|
||||
typedef struct eval_expression EVAL_EXPRESSION;
|
||||
typedef bool (*eval_expression_variable_lookup_t)(STRING *variable, void *data, NETDATA_DOUBLE *result);
|
||||
|
||||
#define EVAL_VALUE_INVALID 0
|
||||
#define EVAL_VALUE_NUMBER 1
|
||||
|
@ -82,6 +50,10 @@ const char *expression_strerror(int error);
|
|||
// 2 = FAILED, the error message is in: buffer_tostring(expression->error_msg)
|
||||
int expression_evaluate(EVAL_EXPRESSION *expression);
|
||||
|
||||
int health_variable_lookup(STRING *variable, struct rrdcalc *rc, NETDATA_DOUBLE *result);
|
||||
const char *expression_source(EVAL_EXPRESSION *expression);
|
||||
const char *expression_parsed_as(EVAL_EXPRESSION *expression);
|
||||
const char *expression_error_msg(EVAL_EXPRESSION *expression);
|
||||
NETDATA_DOUBLE expression_result(EVAL_EXPRESSION *expression);
|
||||
void expression_set_variable_lookup_callback(EVAL_EXPRESSION *expression, eval_expression_variable_lookup_t cb, void *data);
|
||||
|
||||
#endif //NETDATA_EVAL_H
|
||||
|
|
|
@ -1,169 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include "health.h"
|
||||
|
||||
SILENCERS *silencers;
|
||||
|
||||
/**
|
||||
* Create Silencer
|
||||
*
|
||||
* Allocate a new silencer to Netdata.
|
||||
*
|
||||
* @return It returns the address off the silencer on success and NULL otherwise
|
||||
*/
|
||||
SILENCER *create_silencer(void) {
|
||||
SILENCER *t = callocz(1, sizeof(SILENCER));
|
||||
netdata_log_debug(D_HEALTH, "HEALTH command API: Created empty silencer");
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Health Silencers add
|
||||
*
|
||||
* Add more one silencer to the list of silencers.
|
||||
*
|
||||
* @param silencer
|
||||
*/
|
||||
void health_silencers_add(SILENCER *silencer) {
|
||||
// Add the created instance to the linked list in silencers
|
||||
silencer->next = silencers->silencers;
|
||||
silencers->silencers = silencer;
|
||||
netdata_log_debug(
|
||||
D_HEALTH,
|
||||
"HEALTH command API: Added silencer %s:%s:%s:%s",
|
||||
silencer->alarms,
|
||||
silencer->charts,
|
||||
silencer->contexts,
|
||||
silencer->hosts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Silencers Add Parameter
|
||||
*
|
||||
* Create a new silencer and adjust the variables
|
||||
*
|
||||
* @param silencer a pointer to the silencer that will be adjusted
|
||||
* @param key the key value sent by client
|
||||
* @param value the value sent to the key
|
||||
*
|
||||
* @return It returns the silencer configured on success and NULL otherwise
|
||||
*/
|
||||
SILENCER *health_silencers_addparam(SILENCER *silencer, char *key, char *value) {
|
||||
static uint32_t
|
||||
hash_alarm = 0,
|
||||
hash_template = 0,
|
||||
hash_chart = 0,
|
||||
hash_context = 0,
|
||||
hash_host = 0;
|
||||
|
||||
if (unlikely(!hash_alarm)) {
|
||||
hash_alarm = simple_uhash(HEALTH_ALARM_KEY);
|
||||
hash_template = simple_uhash(HEALTH_TEMPLATE_KEY);
|
||||
hash_chart = simple_uhash(HEALTH_CHART_KEY);
|
||||
hash_context = simple_uhash(HEALTH_CONTEXT_KEY);
|
||||
hash_host = simple_uhash(HEALTH_HOST_KEY);
|
||||
}
|
||||
|
||||
uint32_t hash = simple_uhash(key);
|
||||
if (unlikely(silencer == NULL)) {
|
||||
if (
|
||||
(hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) ||
|
||||
(hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) ||
|
||||
(hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) ||
|
||||
(hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) ||
|
||||
(hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY))
|
||||
) {
|
||||
silencer = create_silencer();
|
||||
}
|
||||
}
|
||||
|
||||
if (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) {
|
||||
silencer->alarms = strdupz(value);
|
||||
silencer->alarms_pattern = simple_pattern_create(silencer->alarms, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
} else if (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) {
|
||||
silencer->charts = strdupz(value);
|
||||
silencer->charts_pattern = simple_pattern_create(silencer->charts, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
} else if (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) {
|
||||
silencer->contexts = strdupz(value);
|
||||
silencer->contexts_pattern = simple_pattern_create(silencer->contexts, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
} else if (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) {
|
||||
silencer->hosts = strdupz(value);
|
||||
silencer->hosts_pattern = simple_pattern_create(silencer->hosts, NULL, SIMPLE_PATTERN_EXACT, true);
|
||||
}
|
||||
|
||||
return silencer;
|
||||
}
|
||||
|
||||
/**
|
||||
* JSON Read Callback
|
||||
*
|
||||
* Callback called by netdata to create the silencer.
|
||||
*
|
||||
* @param e the main json structure
|
||||
*
|
||||
* @return It always return 0.
|
||||
*/
|
||||
int health_silencers_json_read_callback(JSON_ENTRY *e)
|
||||
{
|
||||
switch(e->type) {
|
||||
case JSON_OBJECT:
|
||||
#ifndef ENABLE_JSONC
|
||||
e->callback_function = health_silencers_json_read_callback;
|
||||
if(strcmp(e->name,"")) {
|
||||
// init silencer
|
||||
netdata_log_debug(D_HEALTH, "JSON: Got object with a name, initializing new silencer for %s",e->name);
|
||||
#endif
|
||||
e->callback_data = create_silencer();
|
||||
if(e->callback_data) {
|
||||
health_silencers_add(e->callback_data);
|
||||
}
|
||||
#ifndef ENABLE_JSONC
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
|
||||
case JSON_ARRAY:
|
||||
e->callback_function = health_silencers_json_read_callback;
|
||||
break;
|
||||
|
||||
case JSON_STRING:
|
||||
if(!strcmp(e->name,"type")) {
|
||||
netdata_log_debug(D_HEALTH, "JSON: Processing type=%s",e->data.string);
|
||||
if (!strcmp(e->data.string,"SILENCE")) silencers->stype = STYPE_SILENCE_NOTIFICATIONS;
|
||||
else if (!strcmp(e->data.string,"DISABLE")) silencers->stype = STYPE_DISABLE_ALARMS;
|
||||
} else {
|
||||
netdata_log_debug(D_HEALTH, "JSON: Adding %s=%s", e->name, e->data.string);
|
||||
if (e->callback_data)
|
||||
(void)health_silencers_addparam(e->callback_data, e->name, e->data.string);
|
||||
}
|
||||
break;
|
||||
|
||||
case JSON_BOOLEAN:
|
||||
netdata_log_debug(D_HEALTH, "JSON: Processing all_alarms");
|
||||
silencers->all_alarms=e->data.boolean?1:0;
|
||||
break;
|
||||
|
||||
case JSON_NUMBER:
|
||||
case JSON_NULL:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize Global Silencers
|
||||
*
|
||||
* Initialize the silencer for the whole netdata system.
|
||||
*
|
||||
* @return It returns 0 on success and -1 otherwise
|
||||
*/
|
||||
int health_initialize_global_silencers() {
|
||||
silencers = mallocz(sizeof(SILENCERS));
|
||||
silencers->all_alarms = 0;
|
||||
silencers->stype = STYPE_NONE;
|
||||
silencers->silencers = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -633,4 +633,14 @@ static inline bool strendswith(const char *string, const char *suffix) {
|
|||
return strcmp(string + string_len - suffix_len, suffix) == 0;
|
||||
}
|
||||
|
||||
static inline bool strendswith_lengths(const char *string, size_t string_len, const char *suffix, size_t suffix_len) {
|
||||
if (string == NULL || suffix == NULL)
|
||||
return false;
|
||||
|
||||
if (suffix_len > string_len)
|
||||
return false;
|
||||
|
||||
return strcmp(string + string_len - suffix_len, suffix) == 0;
|
||||
}
|
||||
|
||||
#endif //NETDATA_INLINED_H
|
||||
|
|
154
libnetdata/json/json-c-parser-inline.h
Normal file
154
libnetdata/json/json-c-parser-inline.h
Normal file
|
@ -0,0 +1,154 @@
|
|||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#ifndef NETDATA_JSON_C_PARSER_INLINE_H
|
||||
#define NETDATA_JSON_C_PARSER_INLINE_H
|
||||
|
||||
#define JSONC_PARSE_BOOL_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \
|
||||
json_object *_j; \
|
||||
if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_boolean)) \
|
||||
dst = json_object_get_boolean(_j); \
|
||||
else { \
|
||||
buffer_sprintf(error, "missing or invalid type for '%s.%s' boolean", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, member, dst, error, required) do { \
|
||||
json_object *_j; \
|
||||
if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \
|
||||
string_freez(dst); \
|
||||
dst = string_strdupz(json_object_get_string(_j)); \
|
||||
} \
|
||||
else if(required) { \
|
||||
buffer_sprintf(error, "missing or invalid type for '%s.%s' string", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \
|
||||
json_object *_j; \
|
||||
if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \
|
||||
string_freez(dst); \
|
||||
const char *_v = json_object_get_string(_j); \
|
||||
if(strcmp(_v, "*") == 0) \
|
||||
dst = NULL; \
|
||||
else \
|
||||
dst = string_strdupz(_v); \
|
||||
} \
|
||||
else { \
|
||||
buffer_sprintf(error, "missing or invalid type for '%s.%s' string", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define JSONC_PARSE_TXT2EXPRESSION_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \
|
||||
json_object *_j; \
|
||||
if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \
|
||||
const char *_t = json_object_get_string(_j); \
|
||||
if(_t && *_t && strcmp(_t, "*") != 0) { \
|
||||
const char *_failed_at = NULL; \
|
||||
int _err = 0; \
|
||||
expression_free(dst); \
|
||||
dst = expression_parse(_t, &_failed_at, &_err); \
|
||||
if(!dst) { \
|
||||
buffer_sprintf(error, "expression '%s.%s' has a non-parseable expression '%s': %s at '%s'", \
|
||||
path, member, _t, expression_strerror(_err), _failed_at); \
|
||||
return false; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else { \
|
||||
buffer_sprintf(error, "missing or invalid type for '%s.%s' expression", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, member, converter, dst, error) do { \
|
||||
json_object *_jarray; \
|
||||
if (json_object_object_get_ex(jobj, member, &_jarray) && json_object_is_type(_jarray, json_type_array)) { \
|
||||
size_t _num_options = json_object_array_length(_jarray); \
|
||||
dst = 0; \
|
||||
for (size_t _i = 0; _i < _num_options; ++_i) { \
|
||||
json_object *_joption = json_object_array_get_idx(_jarray, _i); \
|
||||
if (!json_object_is_type(_joption, json_type_string)) { \
|
||||
buffer_sprintf(error, "invalid type for '%s.%s' at index %zu", path, member, _i); \
|
||||
return false; \
|
||||
} \
|
||||
const char *_option_str = json_object_get_string(_joption); \
|
||||
typeof(dst) _bit = converter(_option_str); \
|
||||
if (_bit == 0) { \
|
||||
buffer_sprintf(error, "unknown option '%s' in '%s.%s' at index %zu", _option_str, path, member, _i); \
|
||||
return false; \
|
||||
} \
|
||||
dst |= _bit; \
|
||||
} \
|
||||
} else { \
|
||||
buffer_sprintf(error, "missing or invalid type for '%s.%s' array", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, member, converter, dst, error) do { \
|
||||
json_object *_j; \
|
||||
if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) \
|
||||
dst = converter(json_object_get_string(_j)); \
|
||||
else { \
|
||||
buffer_sprintf(error, "missing or invalid type (expected text value) for '%s.%s' enum", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \
|
||||
json_object *_j; \
|
||||
if (json_object_object_get_ex(jobj, member, &_j)) { \
|
||||
if (_j != NULL && json_object_is_type(_j, json_type_int)) \
|
||||
dst = json_object_get_int(_j); \
|
||||
else if (_j != NULL && json_object_is_type(_j, json_type_double)) \
|
||||
dst = (typeof(dst))json_object_get_double(_j); \
|
||||
else if (_j == NULL) \
|
||||
dst = 0; \
|
||||
else { \
|
||||
buffer_sprintf(error, "not supported type (expected int) for '%s.%s'", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} else { \
|
||||
buffer_sprintf(error, "missing or invalid type (expected double value or null) for '%s.%s'", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define JSONC_PARSE_DOUBLE_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \
|
||||
json_object *_j; \
|
||||
if (json_object_object_get_ex(jobj, member, &_j)) { \
|
||||
if (_j != NULL && json_object_is_type(_j, json_type_double)) \
|
||||
dst = json_object_get_double(_j); \
|
||||
else if (_j != NULL && json_object_is_type(_j, json_type_int)) \
|
||||
dst = (typeof(dst))json_object_get_int(_j); \
|
||||
else if (_j == NULL) \
|
||||
dst = NAN; \
|
||||
else { \
|
||||
buffer_sprintf(error, "not supported type (expected double) for '%s.%s'", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} else { \
|
||||
buffer_sprintf(error, "missing or invalid type (expected double value or null) for '%s.%s'", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define JSONC_PARSE_SUBOBJECT(jobj, path, member, dst, callback, error) do { \
|
||||
json_object *_j; \
|
||||
if (json_object_object_get_ex(jobj, member, &_j)) { \
|
||||
char _new_path[strlen(path) + strlen(member) + 2]; \
|
||||
snprintfz(_new_path, sizeof(_new_path), "%s%s%s", path, *path?".":"", member); \
|
||||
if (!callback(_j, _new_path, dst, error)) { \
|
||||
return false; \
|
||||
} \
|
||||
} else { \
|
||||
buffer_sprintf(error, "missing '%s.%s' object", path, member); \
|
||||
return false; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#endif //NETDATA_JSON_C_PARSER_INLINE_H
|
|
@ -1503,12 +1503,15 @@ int path_is_file(const char *path, const char *subpath) {
|
|||
return is_file;
|
||||
}
|
||||
|
||||
void recursive_config_double_dir_load(const char *user_path, const char *stock_path, const char *subpath, int (*callback)(const char *filename, void *data), void *data, size_t depth) {
|
||||
void recursive_config_double_dir_load(const char *user_path, const char *stock_path, const char *subpath, int (*callback)(const char *filename, void *data, bool stock_config), void *data, size_t depth) {
|
||||
if(depth > 3) {
|
||||
netdata_log_error("CONFIG: Max directory depth reached while reading user path '%s', stock path '%s', subpath '%s'", user_path, stock_path, subpath);
|
||||
return;
|
||||
}
|
||||
|
||||
if(!stock_path)
|
||||
stock_path = user_path;
|
||||
|
||||
char *udir = strdupz_path_subpath(user_path, subpath);
|
||||
char *sdir = strdupz_path_subpath(stock_path, subpath);
|
||||
|
||||
|
@ -1542,7 +1545,7 @@ void recursive_config_double_dir_load(const char *user_path, const char *stock_p
|
|||
len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) {
|
||||
char *filename = strdupz_path_subpath(udir, de->d_name);
|
||||
netdata_log_debug(D_HEALTH, "CONFIG calling callback for user file '%s'", filename);
|
||||
callback(filename, data);
|
||||
callback(filename, data, false);
|
||||
freez(filename);
|
||||
continue;
|
||||
}
|
||||
|
@ -1590,7 +1593,7 @@ void recursive_config_double_dir_load(const char *user_path, const char *stock_p
|
|||
len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) {
|
||||
char *filename = strdupz_path_subpath(sdir, de->d_name);
|
||||
netdata_log_debug(D_HEALTH, "CONFIG calling callback for stock file '%s'", filename);
|
||||
callback(filename, data);
|
||||
callback(filename, data, true);
|
||||
freez(filename);
|
||||
continue;
|
||||
}
|
||||
|
@ -1997,7 +2000,7 @@ bool rrdr_relative_window_to_absolute(time_t *after, time_t *before, time_t now)
|
|||
}
|
||||
|
||||
// Returns 1 if an absolute period was requested or 0 if it was a relative period
|
||||
bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_t *now_ptr, bool unittest_running) {
|
||||
bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_t *now_ptr, bool unittest) {
|
||||
time_t now = now_realtime_sec() - 1;
|
||||
|
||||
if(now_ptr)
|
||||
|
@ -2011,16 +2014,16 @@ bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_
|
|||
time_t absolute_minimum_time = now - (10 * 365 * 86400);
|
||||
time_t absolute_maximum_time = now + (1 * 365 * 86400);
|
||||
|
||||
if (after_requested < absolute_minimum_time && !unittest_running)
|
||||
if (after_requested < absolute_minimum_time && !unittest)
|
||||
after_requested = absolute_minimum_time;
|
||||
|
||||
if (after_requested > absolute_maximum_time && !unittest_running)
|
||||
if (after_requested > absolute_maximum_time && !unittest)
|
||||
after_requested = absolute_maximum_time;
|
||||
|
||||
if (before_requested < absolute_minimum_time && !unittest_running)
|
||||
if (before_requested < absolute_minimum_time && !unittest)
|
||||
before_requested = absolute_minimum_time;
|
||||
|
||||
if (before_requested > absolute_maximum_time && !unittest_running)
|
||||
if (before_requested > absolute_maximum_time && !unittest)
|
||||
before_requested = absolute_maximum_time;
|
||||
|
||||
*before = before_requested;
|
||||
|
|
|
@ -572,7 +572,7 @@ void recursive_config_double_dir_load(
|
|||
const char *user_path
|
||||
, const char *stock_path
|
||||
, const char *subpath
|
||||
, int (*callback)(const char *filename, void *data)
|
||||
, int (*callback)(const char *filename, void *data, bool stock_config)
|
||||
, void *data
|
||||
, size_t depth
|
||||
);
|
||||
|
@ -744,7 +744,7 @@ extern char *netdata_configured_host_prefix;
|
|||
#include "adaptive_resortable_list/adaptive_resortable_list.h"
|
||||
#include "url/url.h"
|
||||
#include "json/json.h"
|
||||
#include "health/health.h"
|
||||
#include "json/json-c-parser-inline.h"
|
||||
#include "string/utf8.h"
|
||||
#include "libnetdata/aral/aral.h"
|
||||
#include "onewayalloc/onewayalloc.h"
|
||||
|
@ -902,7 +902,7 @@ extern bool unittest_running;
|
|||
#define API_RELATIVE_TIME_MAX (3 * 365 * 86400)
|
||||
|
||||
bool rrdr_relative_window_to_absolute(time_t *after, time_t *before, time_t now);
|
||||
bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_t *now_ptr, bool unittest_running);
|
||||
bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_t *now_ptr, bool unittest);
|
||||
|
||||
int netdata_base64_decode(const char *encoded, char *decoded, size_t decoded_size);
|
||||
|
||||
|
|
|
@ -56,14 +56,23 @@ inline int group_by_label_isspace(char c) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
inline int dyncfg_id_isspace(char c) {
|
||||
if(c == ':')
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool isspace_map_pluginsd[256] = {};
|
||||
bool isspace_map_config[256] = {};
|
||||
bool isspace_map_group_by_label[256] = {};
|
||||
bool isspace_dyncfg_id_map[256] = {};
|
||||
|
||||
__attribute__((constructor)) void initialize_is_space_arrays(void) {
|
||||
for(int c = 0; c < 256 ; c++) {
|
||||
isspace_map_pluginsd[c] = pluginsd_isspace((char) c);
|
||||
isspace_map_config[c] = config_isspace((char) c);
|
||||
isspace_map_group_by_label[c] = group_by_label_isspace((char) c);
|
||||
isspace_dyncfg_id_map[c] = dyncfg_id_isspace((char)c);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,10 +22,12 @@ static inline void line_splitter_reset(struct line_splitter *line) {
|
|||
int pluginsd_isspace(char c);
|
||||
int config_isspace(char c);
|
||||
int group_by_label_isspace(char c);
|
||||
int dyncfg_id_isspace(char c);
|
||||
|
||||
extern bool isspace_map_pluginsd[256];
|
||||
extern bool isspace_map_config[256];
|
||||
extern bool isspace_map_group_by_label[256];
|
||||
extern bool isspace_dyncfg_id_map[256];
|
||||
|
||||
static inline size_t quoted_strings_splitter(char *str, char **words, size_t max_words, bool *isspace_map) {
|
||||
char *s = str, quote = 0;
|
||||
|
@ -110,6 +112,9 @@ static inline size_t quoted_strings_splitter(char *str, char **words, size_t max
|
|||
#define quoted_strings_splitter_pluginsd(str, words, max_words) \
|
||||
quoted_strings_splitter(str, words, max_words, isspace_map_pluginsd)
|
||||
|
||||
#define quoted_strings_splitter_dyncfg_id(str, words, max_words) \
|
||||
quoted_strings_splitter(str, words, max_words, isspace_dyncfg_id_map)
|
||||
|
||||
static inline char *get_word(char **words, size_t num_words, size_t index) {
|
||||
if (unlikely(index >= num_words))
|
||||
return NULL;
|
||||
|
|
|
@ -18,17 +18,6 @@ void signals_block(void){}
|
|||
void signals_unblock(void){}
|
||||
void signals_reset(void){}
|
||||
|
||||
#ifndef UNIT_TESTING
|
||||
// callback required by eval()
|
||||
int health_variable_lookup(STRING *variable, struct rrdcalc *rc, NETDATA_DOUBLE *result)
|
||||
{
|
||||
(void)variable;
|
||||
(void)rc;
|
||||
(void)result;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void rrdset_thread_rda_free(void){}
|
||||
void sender_thread_buffer_free(void){}
|
||||
void query_target_free(void){}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue