0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-17 11:12:42 +00:00

Store alert log entries even if alert is repeating. ()

* store alarm log entries even if it is repeating

* log times repeated for an alert
This commit is contained in:
Emmanuel Vasilakis 2022-04-20 14:32:59 +03:00 committed by GitHub
parent fce09f9c89
commit 6caab18c2e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 71 additions and 75 deletions

View file

@ -81,35 +81,32 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
if(!rc->units) rc->units = strdupz(st->units);
if(!rrdcalc_isrepeating(rc)) {
time_t now = now_realtime_sec();
ALARM_ENTRY *ae = health_create_alarm_entry(
host,
rc->id,
rc->next_event_id++,
rc->config_hash_id,
now,
rc->name,
rc->rrdset->id,
rc->rrdset->family,
rc->classification,
rc->component,
rc->type,
rc->exec,
rc->recipient,
now - rc->last_status_change,
rc->old_value,
rc->value,
rc->status,
RRDCALC_STATUS_UNINITIALIZED,
rc->source,
rc->units,
rc->info,
0,
0
);
health_alarm_log(host, ae);
}
time_t now = now_realtime_sec();
ALARM_ENTRY *ae = health_create_alarm_entry(
host,
rc->id,
rc->next_event_id++,
rc->config_hash_id,
now,
rc->name,
rc->rrdset->id,
rc->rrdset->family,
rc->classification,
rc->component,
rc->type,
rc->exec,
rc->recipient,
now - rc->last_status_change,
rc->old_value,
rc->value,
rc->status,
RRDCALC_STATUS_UNINITIALIZED,
rc->source,
rc->units,
rc->info,
0,
0);
health_alarm_log(host, ae);
}
static inline int rrdcalc_test_additional_restriction(RRDCALC *rc, RRDSET *st){
@ -159,35 +156,32 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) {
RRDHOST *host = st->rrdhost;
if(!rrdcalc_isrepeating(rc)) {
time_t now = now_realtime_sec();
ALARM_ENTRY *ae = health_create_alarm_entry(
host,
rc->id,
rc->next_event_id++,
rc->config_hash_id,
now,
rc->name,
rc->rrdset->id,
rc->rrdset->family,
rc->classification,
rc->component,
rc->type,
rc->exec,
rc->recipient,
now - rc->last_status_change,
rc->old_value,
rc->value,
rc->status,
RRDCALC_STATUS_REMOVED,
rc->source,
rc->units,
rc->info,
0,
0
);
health_alarm_log(host, ae);
}
time_t now = now_realtime_sec();
ALARM_ENTRY *ae = health_create_alarm_entry(
host,
rc->id,
rc->next_event_id++,
rc->config_hash_id,
now,
rc->name,
rc->rrdset->id,
rc->rrdset->family,
rc->classification,
rc->component,
rc->type,
rc->exec,
rc->recipient,
now - rc->last_status_change,
rc->old_value,
rc->value,
rc->status,
RRDCALC_STATUS_REMOVED,
rc->source,
rc->units,
rc->info,
0,
0);
health_alarm_log(host, ae);
debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname);
@ -422,6 +416,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
rc->delay_multiplier = rt->delay_multiplier;
rc->last_repeat = 0;
rc->times_repeat = 0;
rc->warn_repeat_every = rt->warn_repeat_every;
rc->crit_repeat_every = rt->crit_repeat_every;
@ -534,6 +529,7 @@ inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const ch
newrc->delay_multiplier = rc->delay_multiplier;
newrc->last_repeat = 0;
newrc->times_repeat = 0;
newrc->warn_repeat_every = rc->warn_repeat_every;
newrc->crit_repeat_every = rc->crit_repeat_every;

View file

@ -121,6 +121,7 @@ struct rrdcalc {
time_t next_update; // the next update timestamp of the alarm
time_t last_status_change; // the timestamp of the last time this alarm changed status
time_t last_repeat; // the last time the alarm got repeated
uint32_t times_repeat; // number of times the alarm got repeated
time_t db_after; // the first timestamp evaluated by the db lookup
time_t db_before; // the last timestamp evaluated by the db lookup

View file

@ -1041,19 +1041,19 @@ void *health_main(void *ptr) {
rc->delay_last = delay;
rc->delay_up_to_timestamp = now + delay;
if(likely(!rrdcalc_isrepeating(rc))) {
ALARM_ENTRY *ae = health_create_alarm_entry(
host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info,
rc->delay_last,
(
((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) |
((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0)
)
);
health_alarm_log(host, ae);
}
ALARM_ENTRY *ae = health_create_alarm_entry(
host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info,
rc->delay_last,
(
((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) |
((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0)
)
);
health_alarm_log(host, ae);
rc->last_status_change = now;
rc->old_status = rc->status;
rc->status = status;
@ -1092,6 +1092,7 @@ void *health_main(void *ptr) {
if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) {
rc->last_repeat = now;
if (likely(rc->times_repeat < UINT32_MAX)) rc->times_repeat++;
ALARM_ENTRY *ae = health_create_alarm_entry(
host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,

View file

@ -227,6 +227,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
"\t\t\t\"crit_repeat_every\": \"%u\",\n"
"\t\t\t\"value_string\": \"%s\",\n"
"\t\t\t\"last_repeat\": \"%lu\",\n"
"\t\t\t\"times_repeat\": %lu,\n"
, rc->chart, rc->name
, (unsigned long)rc->id
, hash_id
@ -259,6 +260,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
, rc->crit_repeat_every
, value_string
, (unsigned long)rc->last_repeat
, (unsigned long)rc->times_repeat
);
if(unlikely(rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)) {

View file

@ -560,10 +560,6 @@ inline void health_alarm_log(
) {
debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id);
if(unlikely(alarm_entry_isrepeating(host, ae))) {
error("Repeating alarms cannot be added to host's alarm log entries. It seems somewhere in the logic, API is being misused. Alarm id: %u", ae->alarm_id);
return;
}
// link it
netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
ae->next = host->health_log.alarms;