mirror of
https://github.com/netdata/netdata.git
synced 2025-04-17 11:12:42 +00:00
Store alert log entries even if alert is repeating. (#12226)
* store alarm log entries even if it is repeating * log times repeated for an alert
This commit is contained in:
parent
fce09f9c89
commit
6caab18c2e
5 changed files with 71 additions and 75 deletions
|
@ -81,35 +81,32 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
|
|||
|
||||
if(!rc->units) rc->units = strdupz(st->units);
|
||||
|
||||
if(!rrdcalc_isrepeating(rc)) {
|
||||
time_t now = now_realtime_sec();
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host,
|
||||
rc->id,
|
||||
rc->next_event_id++,
|
||||
rc->config_hash_id,
|
||||
now,
|
||||
rc->name,
|
||||
rc->rrdset->id,
|
||||
rc->rrdset->family,
|
||||
rc->classification,
|
||||
rc->component,
|
||||
rc->type,
|
||||
rc->exec,
|
||||
rc->recipient,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
rc->status,
|
||||
RRDCALC_STATUS_UNINITIALIZED,
|
||||
rc->source,
|
||||
rc->units,
|
||||
rc->info,
|
||||
0,
|
||||
0
|
||||
);
|
||||
health_alarm_log(host, ae);
|
||||
}
|
||||
time_t now = now_realtime_sec();
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host,
|
||||
rc->id,
|
||||
rc->next_event_id++,
|
||||
rc->config_hash_id,
|
||||
now,
|
||||
rc->name,
|
||||
rc->rrdset->id,
|
||||
rc->rrdset->family,
|
||||
rc->classification,
|
||||
rc->component,
|
||||
rc->type,
|
||||
rc->exec,
|
||||
rc->recipient,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
rc->status,
|
||||
RRDCALC_STATUS_UNINITIALIZED,
|
||||
rc->source,
|
||||
rc->units,
|
||||
rc->info,
|
||||
0,
|
||||
0);
|
||||
health_alarm_log(host, ae);
|
||||
}
|
||||
|
||||
static inline int rrdcalc_test_additional_restriction(RRDCALC *rc, RRDSET *st){
|
||||
|
@ -159,35 +156,32 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) {
|
|||
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
if(!rrdcalc_isrepeating(rc)) {
|
||||
time_t now = now_realtime_sec();
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host,
|
||||
rc->id,
|
||||
rc->next_event_id++,
|
||||
rc->config_hash_id,
|
||||
now,
|
||||
rc->name,
|
||||
rc->rrdset->id,
|
||||
rc->rrdset->family,
|
||||
rc->classification,
|
||||
rc->component,
|
||||
rc->type,
|
||||
rc->exec,
|
||||
rc->recipient,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
rc->status,
|
||||
RRDCALC_STATUS_REMOVED,
|
||||
rc->source,
|
||||
rc->units,
|
||||
rc->info,
|
||||
0,
|
||||
0
|
||||
);
|
||||
health_alarm_log(host, ae);
|
||||
}
|
||||
time_t now = now_realtime_sec();
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host,
|
||||
rc->id,
|
||||
rc->next_event_id++,
|
||||
rc->config_hash_id,
|
||||
now,
|
||||
rc->name,
|
||||
rc->rrdset->id,
|
||||
rc->rrdset->family,
|
||||
rc->classification,
|
||||
rc->component,
|
||||
rc->type,
|
||||
rc->exec,
|
||||
rc->recipient,
|
||||
now - rc->last_status_change,
|
||||
rc->old_value,
|
||||
rc->value,
|
||||
rc->status,
|
||||
RRDCALC_STATUS_REMOVED,
|
||||
rc->source,
|
||||
rc->units,
|
||||
rc->info,
|
||||
0,
|
||||
0);
|
||||
health_alarm_log(host, ae);
|
||||
|
||||
debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname);
|
||||
|
||||
|
@ -422,6 +416,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
|
|||
rc->delay_multiplier = rt->delay_multiplier;
|
||||
|
||||
rc->last_repeat = 0;
|
||||
rc->times_repeat = 0;
|
||||
rc->warn_repeat_every = rt->warn_repeat_every;
|
||||
rc->crit_repeat_every = rt->crit_repeat_every;
|
||||
|
||||
|
@ -534,6 +529,7 @@ inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const ch
|
|||
newrc->delay_multiplier = rc->delay_multiplier;
|
||||
|
||||
newrc->last_repeat = 0;
|
||||
newrc->times_repeat = 0;
|
||||
newrc->warn_repeat_every = rc->warn_repeat_every;
|
||||
newrc->crit_repeat_every = rc->crit_repeat_every;
|
||||
|
||||
|
|
|
@ -121,6 +121,7 @@ struct rrdcalc {
|
|||
time_t next_update; // the next update timestamp of the alarm
|
||||
time_t last_status_change; // the timestamp of the last time this alarm changed status
|
||||
time_t last_repeat; // the last time the alarm got repeated
|
||||
uint32_t times_repeat; // number of times the alarm got repeated
|
||||
|
||||
time_t db_after; // the first timestamp evaluated by the db lookup
|
||||
time_t db_before; // the last timestamp evaluated by the db lookup
|
||||
|
|
|
@ -1041,19 +1041,19 @@ void *health_main(void *ptr) {
|
|||
rc->delay_last = delay;
|
||||
rc->delay_up_to_timestamp = now + delay;
|
||||
|
||||
if(likely(!rrdcalc_isrepeating(rc))) {
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
|
||||
rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
|
||||
rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info,
|
||||
rc->delay_last,
|
||||
(
|
||||
((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) |
|
||||
((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0)
|
||||
)
|
||||
);
|
||||
health_alarm_log(host, ae);
|
||||
}
|
||||
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
|
||||
rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
|
||||
rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info,
|
||||
rc->delay_last,
|
||||
(
|
||||
((rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)? HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION : 0) |
|
||||
((rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)? HEALTH_ENTRY_FLAG_SILENCED : 0)
|
||||
)
|
||||
);
|
||||
health_alarm_log(host, ae);
|
||||
|
||||
rc->last_status_change = now;
|
||||
rc->old_status = rc->status;
|
||||
rc->status = status;
|
||||
|
@ -1092,6 +1092,7 @@ void *health_main(void *ptr) {
|
|||
|
||||
if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) {
|
||||
rc->last_repeat = now;
|
||||
if (likely(rc->times_repeat < UINT32_MAX)) rc->times_repeat++;
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
|
||||
rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
|
||||
|
|
|
@ -227,6 +227,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
|
|||
"\t\t\t\"crit_repeat_every\": \"%u\",\n"
|
||||
"\t\t\t\"value_string\": \"%s\",\n"
|
||||
"\t\t\t\"last_repeat\": \"%lu\",\n"
|
||||
"\t\t\t\"times_repeat\": %lu,\n"
|
||||
, rc->chart, rc->name
|
||||
, (unsigned long)rc->id
|
||||
, hash_id
|
||||
|
@ -259,6 +260,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
|
|||
, rc->crit_repeat_every
|
||||
, value_string
|
||||
, (unsigned long)rc->last_repeat
|
||||
, (unsigned long)rc->times_repeat
|
||||
);
|
||||
|
||||
if(unlikely(rc->options & RRDCALC_FLAG_NO_CLEAR_NOTIFICATION)) {
|
||||
|
|
|
@ -560,10 +560,6 @@ inline void health_alarm_log(
|
|||
) {
|
||||
debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id);
|
||||
|
||||
if(unlikely(alarm_entry_isrepeating(host, ae))) {
|
||||
error("Repeating alarms cannot be added to host's alarm log entries. It seems somewhere in the logic, API is being misused. Alarm id: %u", ae->alarm_id);
|
||||
return;
|
||||
}
|
||||
// link it
|
||||
netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
|
||||
ae->next = host->health_log.alarms;
|
||||
|
|
Loading…
Add table
Reference in a new issue