diff --git a/database/contexts/api_v2.c b/database/contexts/api_v2.c index 785e3b41b4..4894af4798 100644 --- a/database/contexts/api_v2.c +++ b/database/contexts/api_v2.c @@ -98,7 +98,11 @@ struct rrdcontext_to_json_v2_data { time_t now; BUFFER *wb; - struct api_v2_contexts_request *request; + union { + struct api_v2_contexts_request *request; + struct api_v2_alerts_request *alerts_request; + }; + DICTIONARY *ctx; CONTEXTS_V2_OPTIONS options; @@ -110,6 +114,16 @@ struct rrdcontext_to_json_v2_data { size_t ni; } nodes; + struct { + Pvoid_t JudyHS; +// ALERT_OPTIONS alert_options; + SIMPLE_PATTERN *scope_pattern; + SIMPLE_PATTERN *pattern; +// time_t after; +// time_t before; + size_t li; + } alerts; + struct { SIMPLE_PATTERN *scope_pattern; SIMPLE_PATTERN *pattern; @@ -136,6 +150,22 @@ struct rrdcontext_to_json_v2_data { struct query_timings timings; }; +static void add_alert_index(Pvoid_t *JudyHS, uuid_t *uuid, ssize_t idx) +{ + Pvoid_t *PValue = JudyHSIns(JudyHS, uuid, sizeof(*uuid), PJE0); + if (!PValue) + return; + *((Word_t *) PValue) = (Word_t) idx; +} + +ssize_t get_alert_index(Pvoid_t JudyHS, uuid_t *uuid) +{ + Pvoid_t *PValue = JudyHSGet(JudyHS, uuid, sizeof(*uuid)); + if (!PValue) + return -1; + return (ssize_t) *((Word_t *) PValue); +} + static FTS_MATCH rrdcontext_to_json_v2_full_text_search(struct rrdcontext_to_json_v2_data *ctl, RRDCONTEXT *rc, SIMPLE_PATTERN *q) { if(unlikely(full_text_search_string(&ctl->q.fts, q, rc->id) || full_text_search_string(&ctl->q.fts, q, rc->family))) @@ -250,6 +280,11 @@ void buffer_json_agent_status_id(BUFFER *wb, size_t ai, usec_t duration_ut) { buffer_json_object_close(wb); } +static ssize_t alert_to_json_v2_add_context(void *data, RRDCONTEXT_ACQUIRED *rca, bool queryable_context __maybe_unused) +{ + return rrdcontext_to_json_v2_add_context(data, rca, queryable_context); +} + void buffer_json_node_add_v2(BUFFER *wb, RRDHOST *host, size_t ni, usec_t duration_ut, bool status) { buffer_json_member_add_string(wb, "mg", host->machine_guid); @@ -595,6 +630,229 @@ static ssize_t rrdcontext_to_json_v2_add_host(void *data, RRDHOST *host, bool qu return 1; } +static ssize_t alert_to_json_v2_add_host(void *data, RRDHOST *host, bool queryable_host) { + if(!queryable_host || !host->rrdctx.contexts) + // the host matches the 'scope_host' but does not match the 'host' patterns + // or the host does not have any contexts + return 0; + + struct rrdcontext_to_json_v2_data *ctl = data; + BUFFER *wb = ctl->wb; + + bool host_matched = (ctl->options & CONTEXTS_V2_NODES); + bool do_contexts = (ctl->options & (CONTEXTS_V2_CONTEXTS)); + + ctl->q.host_match = FTS_MATCHED_NONE; + if((ctl->options & CONTEXTS_V2_SEARCH)) { + // check if we match the host itself + if(ctl->q.pattern && ( + full_text_search_string(&ctl->q.fts, ctl->q.pattern, host->hostname) || + full_text_search_char(&ctl->q.fts, ctl->q.pattern, host->machine_guid) || + (ctl->q.pattern && full_text_search_char(&ctl->q.fts, ctl->q.pattern, ctl->q.host_node_id_str)))) { + ctl->q.host_match = FTS_MATCHED_HOST; + do_contexts = true; + } + } + + if(do_contexts) { + // save it + SIMPLE_PATTERN *old_q = ctl->q.pattern; + + if(ctl->q.host_match == FTS_MATCHED_HOST) + // do not do pattern matching on contexts - we matched the host itself + ctl->q.pattern = NULL; + + ssize_t added = query_scope_foreach_context( + host, ctl->alerts_request->scope_contexts, + ctl->contexts.scope_pattern, ctl->contexts.pattern, + alert_to_json_v2_add_context, queryable_host, ctl); + + // restore it + ctl->q.pattern = old_q; + + if(added == -1) + return -1; + + if(added) + host_matched = true; + } + + if(host_matched && (ctl->options & (CONTEXTS_V2_NODES))) { + buffer_json_add_array_item_object(wb); + buffer_json_node_add_v2(wb, host, ctl->nodes.ni++, 0, false); + + if (ctl->alerts_request->options & ALERT_OPTION_TRANSITIONS) { + if (rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH)) { + buffer_json_member_add_array(wb, "instances"); + health_alert2json(host, wb, ctl->alerts_request->options, ctl->alerts.JudyHS, ctl->alerts_request->after, ctl->alerts_request->before, ctl->alerts_request->last); + buffer_json_array_close(wb); + } + } + + buffer_json_object_close(wb); + } + + return host_matched ? 1 : 0; +} + +static inline bool alert_is_matched( struct api_v2_alerts_request *alerts_request, RRDCALC *rc) +{ + char hash_id[UUID_STR_LEN]; + uuid_unparse_lower(rc->config_hash_id, hash_id); + + if (alerts_request->alert_id) + return (rc->id == alerts_request->alert_id); + + SIMPLE_PATTERN_RESULT match = SP_MATCHED_POSITIVE; + SIMPLE_PATTERN *match_pattern = alerts_request->config_hash_pattern; + if(match_pattern) { + match = simple_pattern_matches_extract(match_pattern, hash_id, NULL, 0); + if(match == SP_NOT_MATCHED) + return false;; + } + + match = SP_MATCHED_POSITIVE; + match_pattern = alerts_request->alert_name_pattern; + if(match_pattern) { + match = simple_pattern_matches_string_extract(match_pattern, rc->name, NULL, 0); + if(match == SP_NOT_MATCHED) + return false; + } + + return true; +} + +static ssize_t alert_to_json_v2_add_alert(void *data, RRDHOST *host, bool queryable_host) { + if(!queryable_host || !host->rrdctx.contexts) + // the host matches the 'scope_host' but does not match the 'host' patterns + // or the host does not have any contexts + return 0; + + struct rrdcontext_to_json_v2_data *ctl = data; + BUFFER *wb = ctl->wb; + + bool host_matched = (ctl->options & CONTEXTS_V2_NODES); + bool do_contexts = (ctl->options & (CONTEXTS_V2_CONTEXTS)); + + if(do_contexts) { + ssize_t added = query_scope_foreach_context( + host, ctl->request->scope_contexts, + ctl->contexts.scope_pattern, ctl->contexts.pattern, + alert_to_json_v2_add_context, queryable_host, ctl); + + if(added == -1) + return -1; + + if(added) + host_matched = true; + } + + if(host_matched && (ctl->options & (CONTEXTS_V2_NODES))) { + if (rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH)) { + + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(host, rc) { + if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) + continue; + + if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset))) + continue; + + if ((ctl->alerts_request->options & ALERT_OPTION_ACTIVE) && + !(rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL)) + continue; + + char hash_id[GUID_LEN + 1]; + uuid_unparse_lower(rc->config_hash_id, hash_id); + + if (!alert_is_matched(ctl->alerts_request, rc)) + continue; + + ssize_t idx = get_alert_index(ctl->alerts.JudyHS, &rc->config_hash_id); + if (idx >= 0) + continue; + + buffer_json_add_array_item_object(wb); + add_alert_index(&ctl->alerts.JudyHS, &rc->config_hash_id, (ssize_t)ctl->alerts.li++); + + buffer_json_member_add_string(wb, "config_hash_id", hash_id); + buffer_json_member_add_string(wb, "name", rrdcalc_name(rc)); + buffer_json_member_add_string(wb, "chart", rrdcalc_chart_name(rc)); + buffer_json_member_add_string(wb, "family", (rc->rrdset) ? rrdset_family(rc->rrdset) : ""); + buffer_json_member_add_string(wb, "class", rc->classification ? rrdcalc_classification(rc) : "Unknown"); + buffer_json_member_add_string(wb, "component", rc->component ? rrdcalc_component(rc) : "Unknown"); + buffer_json_member_add_string(wb, "type", rc->type ? rrdcalc_type(rc) : "Unknown"); + buffer_json_member_add_string(wb, "units", rrdcalc_units(rc)); + buffer_json_member_add_boolean(wb, "enabled", host->health.health_enabled); + + if (ctl->alerts_request->options & ALERT_OPTION_CONFIG) { + buffer_json_member_add_object(wb, "config"); + { + buffer_json_member_add_boolean(wb, "active", (rc->rrdset)); + buffer_json_member_add_boolean(wb, "disabled", (rc->run_flags & RRDCALC_FLAG_DISABLED)); + buffer_json_member_add_boolean(wb, "silenced", (rc->run_flags & RRDCALC_FLAG_SILENCED)); + buffer_json_member_add_string( + wb, "exec", rc->exec ? rrdcalc_exec(rc) : string2str(host->health.health_default_exec)); + buffer_json_member_add_string( + wb, + "recipient", + rc->recipient ? rrdcalc_recipient(rc) : string2str(host->health.health_default_recipient)); + buffer_json_member_add_string(wb, "info", rrdcalc_info(rc)); + buffer_json_member_add_string(wb, "source", rrdcalc_source(rc)); + buffer_json_member_add_time_t(wb, "update_every", rc->update_every); + buffer_json_member_add_time_t(wb, "delay_up_duration", rc->delay_up_duration); + buffer_json_member_add_time_t(wb, "delay_down_duration", rc->delay_down_duration); + buffer_json_member_add_time_t(wb, "delay_max_duration", rc->delay_max_duration); + buffer_json_member_add_double(wb, "delay_multiplier", rc->delay_multiplier); + buffer_json_member_add_time_t(wb, "delay", rc->delay_last); + buffer_json_member_add_time_t(wb, "warn_repeat_every", rc->warn_repeat_every); + buffer_json_member_add_time_t(wb, "crit_repeat_every", rc->crit_repeat_every); + if (unlikely(rc->options & RRDCALC_OPTION_NO_CLEAR_NOTIFICATION)) { + buffer_json_member_add_boolean(wb, "no_clear_notification", true); + } + + if (rc->calculation) { + buffer_json_member_add_string(wb, "calc", rc->calculation->source); + buffer_json_member_add_string(wb, "calc_parsed", rc->calculation->parsed_as); + } + + if (rc->warning) { + buffer_json_member_add_string(wb, "warn", rc->warning->source); + buffer_json_member_add_string(wb, "warn_parsed", rc->warning->parsed_as); + } + + if (rc->critical) { + buffer_json_member_add_string(wb, "crit", rc->critical->source); + buffer_json_member_add_string(wb, "crit_parsed", rc->critical->parsed_as); + } + + if (RRDCALC_HAS_DB_LOOKUP(rc)) { + if (rc->dimensions) + buffer_json_member_add_string(wb, "lookup_dimensions", rrdcalc_dimensions(rc)); + + buffer_json_member_add_string(wb, "lookup_method", time_grouping_method2string(rc->group)); + buffer_json_member_add_time_t(wb, "lookup_after", rc->after); + buffer_json_member_add_time_t(wb, "lookup_before", rc->before); + + BUFFER *temp_id = buffer_create(1, NULL); + buffer_data_options2string(temp_id, rc->options); + + buffer_json_member_add_string(wb, "lookup_options", buffer_tostring(temp_id)); + + buffer_free(temp_id); + } + } + buffer_json_object_close(wb); // config + } + buffer_json_object_close(wb); // Alert + } + foreach_rrdcalc_in_rrdhost_done(rc); + } + } + + return host_matched ? 1 : 0; +} + static void buffer_json_contexts_v2_options_to_array(BUFFER *wb, CONTEXTS_V2_OPTIONS options) { if(options & CONTEXTS_V2_DEBUG) buffer_json_add_array_item_string(wb, "debug"); @@ -991,3 +1249,119 @@ cleanup: return resp; } +int alerts_to_json_v2(BUFFER *wb, struct api_v2_alerts_request *req, CONTEXTS_V2_OPTIONS options) +{ + int resp = HTTP_RESP_OK; + +// ALERT_OPTIONS alert_options = req->options; + struct rrdcontext_to_json_v2_data ctl = { + .wb = wb, + .alerts_request = req, + .ctx = NULL, + .options = options, + .versions = { 0 }, + .nodes.scope_pattern = string_to_simple_pattern(req->scope_nodes), + .nodes.pattern = string_to_simple_pattern(req->nodes), + .contexts.pattern = string_to_simple_pattern(req->contexts), + .contexts.scope_pattern = string_to_simple_pattern(req->scope_contexts), + .timings = { + .received_ut = now_monotonic_usec(), + } + }; + + if(options & CONTEXTS_V2_CONTEXTS) + { + ctl.ctx = dictionary_create_advanced( + DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, NULL, + sizeof(struct rrdcontext_to_json_v2_entry)); + + dictionary_register_delete_callback(ctl.ctx, contexts_delete_callback, NULL); + } + + time_t now_s = now_realtime_sec(); + buffer_json_member_add_uint64(wb, "api", 2); + + { + buffer_json_member_add_object(wb, "request"); + + buffer_json_member_add_object(wb, "scope"); + buffer_json_member_add_string(wb, "scope_nodes", req->scope_nodes); + buffer_json_member_add_string(wb, "scope_contexts", req->scope_contexts); + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "selectors"); + buffer_json_member_add_string(wb, "nodes", req->nodes); + buffer_json_member_add_string(wb, "contexts", req->contexts); + buffer_json_object_close(wb); + + buffer_json_member_add_array(wb, "options"); + buffer_json_contexts_v2_options_to_array(wb, options); + buffer_json_array_close(wb); + + buffer_json_object_close(wb); + } + + // Alert configuration + buffer_json_member_add_array(wb, "alerts"); + + ssize_t ret = query_scope_foreach_host(ctl.nodes.scope_pattern, ctl.nodes.pattern, + alert_to_json_v2_add_alert, &ctl, &ctl.versions, NULL); + + if(unlikely(ret < 0)) { + buffer_flush(wb); + + if(ret == -2) { + buffer_strcat(wb, "query timeout"); + resp = HTTP_RESP_GATEWAY_TIMEOUT; + } + else { + buffer_strcat(wb, "query interrupted"); + resp = HTTP_RESP_BACKEND_FETCH_FAILED; + } + goto cleanup; + } + + buffer_json_array_close(wb); // alerts + + buffer_json_member_add_array(wb, "nodes"); + + ret = query_scope_foreach_host(ctl.nodes.scope_pattern, ctl.nodes.pattern, + alert_to_json_v2_add_host, &ctl, + &ctl.versions, NULL); + + if(unlikely(ret < 0)) { + buffer_flush(wb); + + if(ret == -2) { + buffer_strcat(wb, "query timeout"); + resp = HTTP_RESP_GATEWAY_TIMEOUT; + } + else { + buffer_strcat(wb, "query interrupted"); + resp = HTTP_RESP_BACKEND_FETCH_FAILED; + } + goto cleanup; + } + + buffer_json_array_close(wb); // Nodes + + ctl.timings.executed_ut = now_monotonic_usec(); + version_hashes_api_v2(wb, &ctl.versions); + + buffer_json_agents_array_v2(wb, &ctl.timings, now_s, false); + buffer_json_cloud_timings(wb, "timings", &ctl.timings); + buffer_json_finalize(wb); + + JudyHSFreeArray(&ctl.alerts.JudyHS, PJE0); + +cleanup: +// dictionary_destroy(ctl.ctx); + simple_pattern_free(ctl.nodes.scope_pattern); + simple_pattern_free(ctl.nodes.pattern); + simple_pattern_free(ctl.contexts.pattern); + simple_pattern_free(ctl.contexts.scope_pattern); + simple_pattern_free(req->config_hash_pattern); + simple_pattern_free(req->alert_name_pattern); + + return resp; +} diff --git a/database/contexts/rrdcontext.h b/database/contexts/rrdcontext.h index 540f45075f..644aec9068 100644 --- a/database/contexts/rrdcontext.h +++ b/database/contexts/rrdcontext.h @@ -475,6 +475,27 @@ struct api_v2_contexts_request { void *interrupt_callback_data; }; +struct api_v2_alerts_request { + char *scope_nodes; + char *scope_contexts; + char *nodes; + char *contexts; + char *config_hash; + char *state; + SIMPLE_PATTERN *config_hash_pattern; + char *transition_id; + time_t alert_id; + uint32_t last; + char *alert_name; + SIMPLE_PATTERN *alert_name_pattern; + ALERT_OPTIONS options; + time_t after; + time_t before; + char *q; +}; + +ssize_t get_alert_index(Pvoid_t JudyHS, uuid_t *uuid); + typedef enum __attribute__ ((__packed__)) { CONTEXTS_V2_DEBUG = (1 << 0), CONTEXTS_V2_MINIFY = (1 << 1), @@ -490,6 +511,7 @@ typedef enum __attribute__ ((__packed__)) { } CONTEXTS_V2_OPTIONS; int rrdcontext_to_json_v2(BUFFER *wb, struct api_v2_contexts_request *req, CONTEXTS_V2_OPTIONS options); +int alerts_to_json_v2(BUFFER *wb, struct api_v2_alerts_request *req, CONTEXTS_V2_OPTIONS options); RRDCONTEXT_TO_JSON_OPTIONS rrdcontext_to_json_parse_options(char *o); void buffer_json_agents_array_v2(BUFFER *wb, struct query_timings *timings, time_t now_s, bool info); diff --git a/database/rrd.h b/database/rrd.h index 50f088d139..58a0edbf6d 100644 --- a/database/rrd.h +++ b/database/rrd.h @@ -1014,6 +1014,7 @@ struct alarm_entry { uint32_t unique_id; uint32_t alarm_id; uint32_t alarm_event_id; + usec_t global_id; uuid_t config_hash_id; uuid_t transition_id; diff --git a/database/rrdcalc.c b/database/rrdcalc.c index 69b749dd00..4abeeb4f51 100644 --- a/database/rrdcalc.c +++ b/database/rrdcalc.c @@ -281,6 +281,7 @@ static void rrdcalc_link_to_rrdset(RRDSET *st, RRDCALC *rc) { 0, rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0); + rc->ae = ae; health_alarm_log_add_entry(host, ae); } @@ -324,6 +325,7 @@ static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) { 0, 0); + rc->ae = ae; health_alarm_log_add_entry(host, ae); } diff --git a/database/rrdcalc.h b/database/rrdcalc.h index b9a9f627dc..a2bd3c4458 100644 --- a/database/rrdcalc.h +++ b/database/rrdcalc.h @@ -136,6 +136,7 @@ struct rrdcalc { int delay_up_current; // the current up notification delay duration int delay_down_current; // the current down notification delay duration int delay_last; // the last delay we used + ALARM_ENTRY *ae; // last alarm entry // ------------------------------------------------------------------------ // variables this alarm exposes to the rest of the alarms diff --git a/database/sqlite/sqlite_health.c b/database/sqlite/sqlite_health.c index c51add0179..9f6cea2fde 100644 --- a/database/sqlite/sqlite_health.c +++ b/database/sqlite/sqlite_health.c @@ -90,7 +90,7 @@ failed: #define SQL_INSERT_HEALTH_LOG_DETAIL "INSERT INTO health_log_detail (health_log_id, unique_id, alarm_id, alarm_event_id, " \ "updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, " \ "info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, transition_id, global_id) " \ - "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,now_usec(0)); " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,@global_id); " void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) { sqlite3_stmt *res = NULL; int rc; @@ -318,6 +318,12 @@ void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) { goto failed; } + rc = sqlite3_bind_int64(res, 22, (sqlite3_int64) ae->global_id); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind global_id parameter for SQL_INSERT_HEALTH_LOG_DETAIL"); + goto failed; + } + rc = execute_insert(res); if (unlikely(rc != SQLITE_DONE)) { error_report("HEALTH [%s]: Failed to execute SQL_INSERT_HEALTH_LOG_DETAIL, rc = %d", rrdhost_hostname(host), rc); @@ -740,7 +746,13 @@ void sql_check_removed_alerts_state(RRDHOST *host) /* Health related SQL queries Load from the health log table */ -#define SQL_LOAD_HEALTH_LOG "SELECT hld.unique_id, hld.alarm_id, hld.alarm_event_id, hl.config_hash_id, hld.updated_by_id, hld.updates_id, hld.when_key, hld.duration, hld.non_clear_duration, hld.flags, hld.exec_run_timestamp, hld.delay_up_to_timestamp, hl.name, hl.chart, hl.family, hl.exec, hl.recipient, ah.source, hl.units, hld.info, hld.exec_code, hld.new_status, hld.old_status, hld.delay, hld.new_value, hld.old_value, hld.last_repeat, ah.class, ah.component, ah.type, hl.chart_context, hld.transition_id FROM health_log hl, alert_hash ah, health_log_detail hld where hl.config_hash_id = ah.hash_id and hl.host_id = @host_id and hl.last_transition_id = hld.transition_id;" +#define SQL_LOAD_HEALTH_LOG "SELECT hld.unique_id, hld.alarm_id, hld.alarm_event_id, hl.config_hash_id, hld.updated_by_id, " \ + "hld.updates_id, hld.when_key, hld.duration, hld.non_clear_duration, hld.flags, hld.exec_run_timestamp, " \ + "hld.delay_up_to_timestamp, hl.name, hl.chart, hl.family, hl.exec, hl.recipient, ah.source, hl.units, " \ + "hld.info, hld.exec_code, hld.new_status, hld.old_status, hld.delay, hld.new_value, hld.old_value, " \ + "hld.last_repeat, ah.class, ah.component, ah.type, hl.chart_context, hld.transition_id, hld.global_id " \ + "FROM health_log hl, alert_hash ah, health_log_detail hld " \ + "WHERE hl.config_hash_id = ah.hash_id and hl.host_id = @host_id and hl.last_transition_id = hld.transition_id;" void sql_health_alarm_log_load(RRDHOST *host) { sqlite3_stmt *res = NULL; int ret; @@ -911,8 +923,11 @@ void sql_health_alarm_log_load(RRDHOST *host) { else ae->chart_context = NULL; - if (sqlite3_column_type(res, 31) != SQLITE_NULL) - uuid_copy(ae->transition_id, *((uuid_t *) sqlite3_column_blob(res, 31))); + if (sqlite3_column_type(res, 31) != SQLITE_NULL) + uuid_copy(ae->transition_id, *((uuid_t *)sqlite3_column_blob(res, 31))); + + if (sqlite3_column_type(res, 32) != SQLITE_NULL) + ae->global_id = sqlite3_column_int64(res, 32); char value_string[100 + 1]; string_freez(ae->old_value_string); @@ -1674,3 +1689,119 @@ uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t * return alarm_id; } + +#define SQL_SELECT_HEALTH_LOG_V2 "SELECT unique_id, alarm_event_id, updated_by_id, " \ + "updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, " \ + "exec, recipient, info, exec_code, new_status, old_status, delay, new_value, " \ + "old_value, last_repeat, transition_id, units, d.global_id FROM health_log_detail d, health_log h " \ + "WHERE h.host_id = @host_id AND h.health_log_id = d.health_log_id " + +void sql_health_alarm_log2json_v2(RRDHOST *host, BUFFER *wb, uint32_t alert_id, char *chart, time_t after, time_t before, uint32_t max) +{ + sqlite3_stmt *res = NULL; + int rc; + + BUFFER *command = buffer_create(MAX_HEALTH_SQL_SIZE, NULL); + + buffer_sprintf(command, SQL_SELECT_HEALTH_LOG_V2); + + if (alert_id) + buffer_sprintf(command, "AND d.alarm_id = %u ", alert_id); + + if (chart) + buffer_sprintf(command, "AND h.chart = '%s' ", chart); + + if (after) + buffer_sprintf(command, "AND d.when_key >= %ld ", after); + + if (before) + buffer_sprintf(command, "AND d.when_key < %ld ", before); + + buffer_sprintf(command, " ORDER BY d.alarm_event_id DESC LIMIT %u", max); + + rc = sqlite3_prepare_v2(db_meta, buffer_tostring(command), -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement SQL_SELECT_HEALTH_LOG"); + buffer_free(command); + return; + } + + rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to bind host_id parameter for SQL_GET_ALARM_ID."); + + while (sqlite3_step(res) == SQLITE_ROW) { + + char old_value_string[100 + 1]; + char new_value_string[100 + 1]; + + // One alarm + buffer_json_add_array_item_object(wb); + + if (sqlite3_column_type(res, 20) != SQLITE_NULL) { + char transition_id[UUID_STR_LEN]; + uuid_unparse_lower(*((uuid_t *)sqlite3_column_blob(res, 20)), transition_id); + buffer_json_member_add_string(wb, "transition_id", transition_id); + } + else + buffer_json_member_add_quoted_string(wb, "transition_id", "null"); + + buffer_json_member_add_uint64(wb, "gi", (uint64_t) sqlite3_column_int64(res, 22)); + + uint64_t flags = sqlite3_column_int64(res, 8); + buffer_json_member_add_uint64 (wb, "utc_offset", host->utc_offset); + buffer_json_member_add_string (wb, "timezone", rrdhost_abbrev_timezone(host)); + buffer_json_member_add_uint64 (wb, "unique_id", (unsigned int) sqlite3_column_int64(res, 0)); + buffer_json_member_add_uint64 (wb, "alarm_event_id", (unsigned int) sqlite3_column_int64(res, 1)); + buffer_json_member_add_object(wb, "notif_status"); + { + const char *exec = (const char *) sqlite3_column_text(res, 10); + const char *recipient = (const char *) sqlite3_column_text(res, 11); + + buffer_json_member_add_boolean (wb, "processed", flags & HEALTH_ENTRY_FLAG_PROCESSED); + buffer_json_member_add_boolean(wb, "updated", flags & HEALTH_ENTRY_FLAG_UPDATED); + buffer_json_member_add_uint64 (wb, "exec_run", (long unsigned int)sqlite3_column_int64(res, 9)); + buffer_json_member_add_boolean (wb, "exec_failed", flags & HEALTH_ENTRY_FLAG_EXEC_FAILED); + buffer_json_member_add_string (wb, "exec", exec ? (const char *) exec : string2str(host->health.health_default_exec)); + buffer_json_member_add_string (wb, "recipient", recipient ? recipient : string2str(host->health.health_default_recipient)); + buffer_json_member_add_uint64 (wb, "exec_code", sqlite3_column_int(res, 13)); + } + buffer_json_object_close(wb); // notif_status + buffer_json_member_add_uint64 (wb, "when", (long unsigned int)sqlite3_column_int64(res, 4)); + buffer_json_member_add_uint64 (wb, "duration", (long unsigned int)sqlite3_column_int64(res, 5)); + buffer_json_member_add_uint64 (wb, "non_clear_duration", (long unsigned int)sqlite3_column_int64(res, 6)); + + buffer_json_member_add_string (wb, "status", rrdcalc_status2string(sqlite3_column_int(res, 14))); + buffer_json_member_add_string (wb, "old_status", rrdcalc_status2string(sqlite3_column_int(res, 15))); + buffer_json_member_add_uint64 (wb, "delay", sqlite3_column_int(res, 16)); + buffer_json_member_add_uint64 (wb, "delay_up_to_timestamp", (long unsigned int)sqlite3_column_int64(res, 10)); + buffer_json_member_add_uint64 (wb, "updated_by_id", (unsigned int)sqlite3_column_int64(res, 3)); + buffer_json_member_add_uint64 (wb, "updates_id", (unsigned int)sqlite3_column_int64(res, 4)); + buffer_json_member_add_uint64(wb, "last_repeat", (long unsigned int)sqlite3_column_int64(res, 19)); + buffer_json_member_add_boolean (wb, "silenced", flags & HEALTH_ENTRY_FLAG_SILENCED); + buffer_json_member_add_string (wb, "info", (const char *)sqlite3_column_text(res, 12)); + + if(unlikely(flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) + buffer_json_member_add_boolean (wb, "no_clear_notification", true); + + buffer_json_member_add_string (wb, "value_string", sqlite3_column_type(res, 17) == SQLITE_NULL ? "-" : format_value_and_unit(new_value_string, 100, sqlite3_column_double(res, 17), (char *) sqlite3_column_text(res, 21), -1)); + if (sqlite3_column_type(res, 17) == SQLITE_NULL) { + buffer_json_member_add_quoted_string(wb, "value", "null"); + } else { + buffer_json_member_add_double(wb, "value", sqlite3_column_double(res, 17)); + } + + buffer_json_member_add_string (wb, "old_value_string", sqlite3_column_type(res, 18) == SQLITE_NULL ? "-" : format_value_and_unit(old_value_string, 100, sqlite3_column_double(res, 18), (char *) sqlite3_column_text(res, 21), -1)); + if (sqlite3_column_type(res, 18) == SQLITE_NULL) { + buffer_json_member_add_quoted_string(wb, "old_value", "null"); + } else + buffer_json_member_add_double(wb, "old_value", sqlite3_column_double(res, 18)); + buffer_json_object_close(wb); + } + + rc = sqlite3_finalize(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize statement for SQL_SELECT_HEALTH_LOG"); + + buffer_free(command); +} diff --git a/database/sqlite/sqlite_health.h b/database/sqlite/sqlite_health.h index a2ca56d45d..45460a267d 100644 --- a/database/sqlite/sqlite_health.h +++ b/database/sqlite/sqlite_health.h @@ -18,4 +18,5 @@ int sql_health_get_last_executed_event(RRDHOST *host, ALARM_ENTRY *ae, RRDCALC_S void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *chart); int health_migrate_old_health_log_table(char *table); uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id); +void sql_health_alarm_log2json_v2(RRDHOST *host, BUFFER *wb, uint32_t alert_id, char *chart, time_t after, time_t before, uint32_t top); #endif //NETDATA_SQLITE_HEALTH_H diff --git a/health/health.c b/health/health.c index 3dd3f96162..97df34fcf8 100644 --- a/health/health.c +++ b/health/health.c @@ -1128,6 +1128,7 @@ void *health_main(void *ptr) { rc->last_status_change = now; rc->last_updated = now; rc->value = NAN; + rc->ae = ae; #ifdef ENABLE_ACLK if (netdata_cloud_enabled) @@ -1396,6 +1397,7 @@ void *health_main(void *ptr) { rc->last_status_change = now; rc->old_status = rc->status; rc->status = status; + rc->ae = ae; } rc->last_updated = now; @@ -1473,6 +1475,7 @@ void *health_main(void *ptr) { ae->flags |= HEALTH_ENTRY_RUN_ONCE; } rc->run_flags |= RRDCALC_FLAG_RUN_ONCE; + rc->ae = ae; health_process_notifications(host, ae); debug(D_HEALTH, "Notification sent for the repeating alarm %u.", ae->alarm_id); health_alarm_wait_for_execution(ae); diff --git a/health/health.h b/health/health.h index c36aabac7e..8b967c197b 100644 --- a/health/health.h +++ b/health/health.h @@ -40,6 +40,8 @@ void health_reload(void); void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* context, RRDCALC_STATUS status); void health_alarms2json(RRDHOST *host, BUFFER *wb, int all); +void health_alert2json(RRDHOST *host, BUFFER *wb, ALERT_OPTIONS all, Pvoid_t JudyHS, time_t after, time_t before, uint32_t top); +void health_alert2json_conf(RRDHOST *host, BUFFER *wb, ALERT_OPTIONS all); void health_alarms_values2json(RRDHOST *host, BUFFER *wb, int all); void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf); diff --git a/health/health_json.c b/health/health_json.c index 4f81998f07..6961b1dcb1 100644 --- a/health/health_json.c +++ b/health/health_json.c @@ -167,6 +167,64 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC buffer_strcat(wb, "\t\t}"); } + +static inline void health_alerts_rrdcalc2json_nolock(RRDHOST *host __maybe_unused, BUFFER *wb, + RRDCALC *rc, ALERT_OPTIONS options __maybe_unused, + Pvoid_t JudyHS, time_t after, time_t before, uint32_t top) +{ + ssize_t idx= get_alert_index(JudyHS, &rc->config_hash_id); + // If not in index then skip it + if (idx < 0) + return; + + char value_string[100 + 1]; + format_value_and_unit(value_string, 100, rc->value, rrdcalc_units(rc), -1); + + + char hash_id[UUID_STR_LEN]; + uuid_unparse_lower(rc->config_hash_id, hash_id); + + buffer_json_add_array_item_object(wb); + if ((!after || after <= rc->last_updated) && (!before || before >= rc->last_updated)) { + + buffer_json_member_add_uint64(wb, "li", (size_t) idx); + + char trans_uuid_str[UUID_STR_LEN]; + if (rc->ae) { + uuid_unparse_lower(rc->ae->transition_id, trans_uuid_str); + buffer_json_member_add_string(wb, "transition_id", trans_uuid_str); + buffer_json_member_add_uint64(wb, "gi", rc->ae->global_id); + } + else { + buffer_json_member_add_quoted_string(wb, "transition_id", "NULL"); + buffer_json_member_add_quoted_string(wb, "gi", "NULL"); + } + + buffer_json_member_add_string(wb, "status", rrdcalc_status2string(rc->status)); + buffer_json_member_add_uint64(wb, "last_status_change", (unsigned long)rc->last_status_change); + buffer_json_member_add_uint64(wb, "last_updated", (unsigned long)rc->last_updated); + buffer_json_member_add_uint64(wb, "next_update", (unsigned long)rc->next_update); + buffer_json_member_add_uint64(wb, "delay_up_to_timestamp", (unsigned long)rc->delay_up_to_timestamp); + + buffer_json_member_add_string(wb, "value_string", value_string); + buffer_json_member_add_uint64(wb, "last_repeat", (unsigned long)rc->last_repeat); + buffer_json_member_add_uint64(wb, "times_repeat", (unsigned long)rc->times_repeat); + buffer_json_member_add_uint64(wb, "db_after", (unsigned long)rc->db_after); + buffer_json_member_add_uint64(wb, "db_before", (unsigned long)rc->db_before); + + buffer_json_member_add_double(wb, "green", rc->green); + buffer_json_member_add_double(wb, "red", rc->red); + buffer_json_member_add_double(wb, "value", rc->value); + + if (options & ALERT_OPTION_INSTANCES) { + buffer_json_member_add_array(wb, "transitions"); + sql_health_alarm_log2json_v2(host, wb, rc->id, NULL, after, before, top); + buffer_json_array_close(wb); + } + } + buffer_json_object_close(wb); // array entry +} + //void health_rrdcalctemplate2json_nolock(BUFFER *wb, RRDCALCTEMPLATE *rt) { // //} @@ -234,6 +292,37 @@ static void health_alarms2json_fill_alarms(RRDHOST *host, BUFFER *wb, int all, v foreach_rrdcalc_in_rrdhost_done(rc); } +static void health_alerts2json_fill_alarms( + RRDHOST *host, + BUFFER *wb, + ALERT_OPTIONS all, + Pvoid_t JudyHS, + time_t after, + time_t before, + uint32_t top, + void (*fp)(RRDHOST *, BUFFER *, RRDCALC *, ALERT_OPTIONS, Pvoid_t , time_t, time_t, uint32_t)) +{ + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(host, rc) { + if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) + continue; + + if (unlikely(!rrdset_is_available_for_exporting_and_alarms(rc->rrdset))) + continue; + + if(likely((all & ALERT_OPTION_ACTIVE) && !(rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL))) + continue; + + fp(host, wb, rc, all, JudyHS, after, before, top); + } + foreach_rrdcalc_in_rrdhost_done(rc); +} + +void health_alert2json(RRDHOST *host, BUFFER *wb, ALERT_OPTIONS options, Pvoid_t JudyHS, time_t after, time_t before, uint32_t top) +{ + health_alerts2json_fill_alarms(host, wb, options, JudyHS, after, before, top, health_alerts_rrdcalc2json_nolock); +} + void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) { buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\"," "\n\t\"latest_alarm_log_unique_id\": %u," diff --git a/health/health_log.c b/health/health_log.c index b62e0ace41..788fa57ac3 100644 --- a/health/health_log.c +++ b/health/health_log.c @@ -47,6 +47,7 @@ inline ALARM_ENTRY* health_create_alarm_entry( uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id)); uuid_generate_random(ae->transition_id); + ae->global_id = now_realtime_usec(); ae->family = string_dup(family); ae->classification = string_dup(class); diff --git a/web/api/queries/rrdr.h b/web/api/queries/rrdr.h index c57be67f53..05e9d6daaa 100644 --- a/web/api/queries/rrdr.h +++ b/web/api/queries/rrdr.h @@ -51,6 +51,14 @@ typedef enum rrdr_options { RRDR_OPTION_INTERNAL_AR = (1 << 31), // internal use only, to let the formatters know we want to render the anomaly rate } RRDR_OPTIONS; +typedef enum alert_options { + ALERT_OPTION_MINIFY = (1 << 0), // remove JSON spaces and newlines from JSON output + ALERT_OPTION_ACTIVE = (1 << 1), // Only active alerts + ALERT_OPTION_CONFIG = (1 << 2), // Include config + ALERT_OPTION_TRANSITIONS = (1 << 3), // Include transitions + ALERT_OPTION_INSTANCES = (1 << 4), // Include alert instances +} ALERT_OPTIONS; + typedef enum __attribute__ ((__packed__)) rrdr_value_flag { // IMPORTANT: diff --git a/web/api/web_api_v1.c b/web/api/web_api_v1.c index d4b65c17f1..8284a04e18 100644 --- a/web/api/web_api_v1.c +++ b/web/api/web_api_v1.c @@ -50,6 +50,19 @@ static struct { , {NULL , 0 , 0} }; +static struct { + const char *name; + uint32_t hash; + ALERT_OPTIONS value; +} api_v2_alert_options[] = { + {"minify" , 0 , ALERT_OPTION_MINIFY} + , {"active" , 0 , ALERT_OPTION_ACTIVE} + , {"config" , 0 , ALERT_OPTION_CONFIG} + , {"transitions" , 0 , ALERT_OPTION_TRANSITIONS} + , {"instances" , 0 , ALERT_OPTION_INSTANCES} + , {NULL , 0 , 0} +}; + static struct { const char *name; uint32_t hash; @@ -94,6 +107,9 @@ void web_client_api_v1_init(void) { for(i = 0; api_v1_data_options[i].name ; i++) api_v1_data_options[i].hash = simple_hash(api_v1_data_options[i].name); + for(i = 0; api_v2_alert_options[i].name ; i++) + api_v2_alert_options[i].hash = simple_hash(api_v2_alert_options[i].name); + for(i = 0; api_v1_data_formats[i].name ; i++) api_v1_data_formats[i].hash = simple_hash(api_v1_data_formats[i].name); @@ -194,6 +210,26 @@ inline RRDR_OPTIONS web_client_api_request_v1_data_options(char *o) { return ret; } +inline ALERT_OPTIONS web_client_api_request_v2_alert_options(char *o) { + ALERT_OPTIONS ret = 0x00000000; + char *tok; + + while(o && *o && (tok = strsep_skip_consecutive_separators(&o, ", |"))) { + if(!*tok) continue; + + uint32_t hash = simple_hash(tok); + int i; + for(i = 0; api_v2_alert_options[i].name ; i++) { + if (unlikely(hash == api_v2_alert_options[i].hash && !strcmp(tok, api_v2_alert_options[i].name))) { + ret |= api_v2_alert_options[i].value; + break; + } + } + } + + return ret; +} + void web_client_api_request_v1_data_options_to_buffer_json_array(BUFFER *wb, const char *key, RRDR_OPTIONS options) { buffer_json_member_add_array(wb, key); diff --git a/web/api/web_api_v1.h b/web/api/web_api_v1.h index 6fa8de017f..7565bb532a 100644 --- a/web/api/web_api_v1.h +++ b/web/api/web_api_v1.h @@ -8,6 +8,7 @@ struct web_client; RRDR_OPTIONS web_client_api_request_v1_data_options(char *o); +ALERT_OPTIONS web_client_api_request_v2_alert_options(char *o); void web_client_api_request_v1_data_options_to_buffer_json_array(BUFFER *wb, const char *key, RRDR_OPTIONS options); void web_client_api_request_v1_data_options_to_string(char *buf, size_t size, RRDR_OPTIONS options); diff --git a/web/api/web_api_v2.c b/web/api/web_api_v2.c index 79ed02546b..4477932378 100644 --- a/web/api/web_api_v2.c +++ b/web/api/web_api_v2.c @@ -358,6 +358,84 @@ cleanup: return ret; } +static int web_client_api_request_v2_alerts(RRDHOST *host __maybe_unused, struct web_client *w, char *url) +{ + time_t after = 0; + time_t before = 0; + struct api_v2_alerts_request req = { 0 }; + + CONTEXTS_V2_OPTIONS options = CONTEXTS_V2_CONTEXTS | CONTEXTS_V2_NODES; + ALERT_OPTIONS alert_options = 0; + + while(url) { + char *value = strsep_skip_consecutive_separators(&url, "&"); + if(!value || !*value) continue; + + char *name = strsep_skip_consecutive_separators(&value, "="); + if(!name || !*name) continue; + if(!value || !*value) continue; + + // name and value are now the parameters + // they are not null and not empty + + if (!strcmp(name, "scope_nodes")) { + req.scope_nodes = value; + options |= CONTEXTS_V2_NODES; + } + else if (!strcmp(name, "nodes")) { + req.nodes = value; + options |= CONTEXTS_V2_NODES; + } + else if (!strcmp(name, "scope_contexts")) { + req.scope_contexts = value; + options |= CONTEXTS_V2_CONTEXTS; + } + else if (!strcmp(name, "instance_id")) + req.alert_id = (time_t)strtoul(value, NULL, 0); + else if (!strcmp(name, "last")) + req.last = strtoul(value, NULL, 0); + else if (!strcmp(name, "transition_id")) { + req.transition_id = value; + } else if (!strcmp(name, "alert_name")) { + req.alert_name = value; + req.alert_name_pattern = string_to_simple_pattern(value); + } else if (!strcmp(name, "config_hash")) { + req.config_hash = value; + req.config_hash_pattern = string_to_simple_pattern(value); + } else if (!strcmp(name, "contexts")) { + req.contexts = value; + options |= CONTEXTS_V2_CONTEXTS; + } else if (!strcmp(name, "state")) { + req.state = value; // all | warning | critical, clear, undefined, uninitialiazed + } else if (!strcmp(name, "q")) + req.q = value; + else if (!strcmp(name, "after")) + after = (time_t)strtoul(value, NULL, 0); + else if (!strcmp(name, "before")) + before = (time_t)strtoul(value, NULL, 0); + else if (!strcmp(name, "output")) { + // config, instances, transitions + alert_options |= web_client_api_request_v2_alert_options(value); + } + + // TODO: All states and special "ACTIVE" + } + + buffer_flush(w->response.data); + buffer_no_cacheable(w->response.data); + w->response.data->content_type = CT_APPLICATION_JSON; + buffer_json_initialize( w->response.data, "\"", "\"", 0, true, alert_options & ALERT_OPTION_MINIFY); + + if (alert_options & ALERT_OPTION_INSTANCES && !req.last) + req.last = 1; + + req.options = alert_options; + req.after = after; + req.before = before; + + return alerts_to_json_v2(w->response.data, &req, options); +} + static int web_client_api_request_v2_webrtc(RRDHOST *host __maybe_unused, struct web_client *w, char *url __maybe_unused) { return webrtc_new_connection(w->post_payload, w->response.data); } @@ -373,6 +451,7 @@ static struct web_api_command api_commands_v2[] = { {"q", 0, WEB_CLIENT_ACL_DASHBOARD_ACLK_WEBRTC, web_client_api_request_v2_q}, {"rtc_offer", 0, WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_ACLK, web_client_api_request_v2_webrtc}, + {"alerts", 0, WEB_CLIENT_ACL_DASHBOARD_ACLK_WEBRTC, web_client_api_request_v2_alerts}, // terminator {NULL, 0, WEB_CLIENT_ACL_NONE, NULL},