mirror of
https://github.com/netdata/netdata.git
synced 2025-04-17 19:22:40 +00:00
Create a template for all dimensions (#6560)
* health_connection: Comments inside Health Config To try to understand better what is necessary to change and where it is necessary to change anything inside the health, I commented the functions inside this file" " * health_connection: Comments about Health in other files This commit brings the rest of the comments that were missed for health" * health_connection: Comments on health_log I had to append more comments on health_log * health_connection: Create a new variable New variable is created to work with foreach * health_connection: Fix new option and doc The first implementation of the 'foreach' had a problem, this fixes the error. This commit also brings the updates for the documentation * health_connection: Understanding health This commit is to save the place that I am working, it has the map to understand all the alam process * health_connection: Update map I changed the position of the error message to identify the correct place to add new alarms * health_connection: End of simple alarm This commit finishes what is necessary to bring the same lookup for different dimensions in one unique line * health_connection: Documentation and template steps This commit brings the documentation missed for template and comments to help in the next step of apply a template to create an alarm. * health_connection: Restoring After some tests, it was detected that the alarms were not working as expected * health_connection: Fix bug and bring dimension to template This commit brings a fix for an old Netdata bug, before this the Netdata always tried to create a new entry in an index with the same id raising an error. It also brings the possibility to use 'foreach' in template * health_connection: Fix cmake compilation There was a problem with cmake compilation fixed by this commit * health_connection: shell script Finilize the shell script to test the PR * health_connection: Remove debug message During the development, I used some messages to understand the code this commit removes the last message * health_connection: Fix bugs This commits fix bugs reported by tests * health_connection: Alarm working This commit brings the necessary change for the alarms work, but it is missing the unlink from the newest list * health_connection: Template code written This commit finishes the creation of alarm from template, but it was not tested yet. * health_connection: Remove comments I am removing the comments from this PR to bring back late * health_connection: Remove lines Another commit to restore the files before they to be commented * health_connection: New alarm and remove messages I am bringing a new alarm to test template with SP and removing comments used during the development * health_connection: Functional test review After to review the functional test script, it was necessary to small adjust to test all the features available with the new version * health_connection: Free structure I am moving the free list for the correct place, the previous place was not safe * health_connection: ShellCheck This commit fixes the problems with shellcheck * health_connection: FIx hash This commit fix the hash calculation that was using wrong input * health_connection: Fix message error The system was showing a wronge message, because when we have foreach the alarm created with templated is added in a second stage to the index * health_connection: Fix documentation In this commit I am fixing the grammar of the previous doc and bringing two examples * health_connection: Fix examples This commit fix the last two examples that was brought in this PR * health_connection: Fix example doc When I brought the correct grammar in the last commit, I lost a mark * health_connection: Grammar fix Fixing grammar of the documentation * health_connection: Memory leak This commit fixes the memory leak that was present in the PR * health_connection: Reload This commit fix the problem that the alarms were not linked after to receive a SIGUSR2 * health_connection: False Positive from codacy Codacy was given a false positive, I changed the function to avoid it. * health_connection: dead code Remove dead code from the code. * health_connection: Memory Leak Remove memory leak when clean simple pattern * health_connection: Script format With this commit I am formatting the last message to return for the default color on terminal * health_connection: Script format 2 With this commit I am formatting the last message to return for the default color on terminal * health_connection: Script format 3 With this commit I am formatting the error message to return for the default color on terminal
This commit is contained in:
parent
a8b28bfbd2
commit
e3471fa572
20 changed files with 637 additions and 92 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -170,6 +170,7 @@ sitespeed-result/
|
|||
tests/acls/acl.sh
|
||||
tests/urls/request.sh
|
||||
tests/alarm_repetition/alarm.sh
|
||||
tests/template_dimension/template_dim.sh
|
||||
|
||||
# tests and temp files
|
||||
python.d/python-modules-installer.sh
|
||||
|
|
|
@ -697,6 +697,7 @@ struct rrdhost {
|
|||
// RRDCALCs may be linked to charts at any point
|
||||
// (charts may or may not exist when these are loaded)
|
||||
RRDCALC *alarms;
|
||||
RRDCALC *alarms_with_foreach;
|
||||
avl_tree_lock alarms_idx_health_log;
|
||||
avl_tree_lock alarms_idx_name;
|
||||
|
||||
|
@ -709,6 +710,7 @@ struct rrdhost {
|
|||
// these are used to create alarms when charts
|
||||
// are created or renamed, that match them
|
||||
RRDCALCTEMPLATE *templates;
|
||||
RRDCALCTEMPLATE *alarms_template_with_foreach;
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -1008,6 +1010,7 @@ static inline time_t rrdset_slot2time(RRDSET *st, size_t slot) {
|
|||
// ----------------------------------------------------------------------------
|
||||
// RRD DIMENSION functions
|
||||
|
||||
extern void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host);
|
||||
extern RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode);
|
||||
#define rrddim_add(st, id, name, multiplier, divisor, algorithm) rrddim_add_custom(st, id, name, multiplier, divisor, algorithm, (st)->rrd_memory_mode)
|
||||
|
||||
|
|
|
@ -255,6 +255,53 @@ inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const ch
|
|||
return host->health_log.next_alarm_id++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Alarm name with dimension
|
||||
*
|
||||
* Change the name of the current alarm appending a new diagram.
|
||||
*
|
||||
* @param name the alarm name
|
||||
* @param namelen is the length of the previous vector.
|
||||
* @param dim the dimension of the chart.
|
||||
* @param dimlen is the length of the previous vector.
|
||||
*
|
||||
* @return It returns the new name on success and the old otherwise
|
||||
*/
|
||||
char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen) {
|
||||
char *newname,*move;
|
||||
|
||||
newname = malloc(namelen + dimlen + 2);
|
||||
if(newname) {
|
||||
move = newname;
|
||||
memcpy(move, name, namelen);
|
||||
move += namelen;
|
||||
|
||||
*move++ = '_';
|
||||
memcpy(move, dim, dimlen);
|
||||
move += dimlen;
|
||||
*move = '\0';
|
||||
} else {
|
||||
newname = name;
|
||||
}
|
||||
|
||||
return newname;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove pipe comma
|
||||
*
|
||||
* Remove the pipes and commas converting to space.
|
||||
*
|
||||
* @param str the string to change.
|
||||
*/
|
||||
void dimension_remove_pipe_comma(char *str) {
|
||||
while(*str) {
|
||||
if(*str == '|' || *str == ',') *str = ' ';
|
||||
|
||||
str++;
|
||||
}
|
||||
}
|
||||
|
||||
inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) {
|
||||
rrdhost_check_rdlock(host);
|
||||
|
||||
|
@ -282,24 +329,39 @@ inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) {
|
|||
rc->critical->rrdcalc = rc;
|
||||
}
|
||||
|
||||
// link it to the host
|
||||
if(likely(host->alarms)) {
|
||||
// append it
|
||||
RRDCALC *t;
|
||||
for(t = host->alarms; t && t->next ; t = t->next) ;
|
||||
t->next = rc;
|
||||
}
|
||||
else {
|
||||
host->alarms = rc;
|
||||
}
|
||||
|
||||
// link it to its chart
|
||||
RRDSET *st;
|
||||
rrdset_foreach_read(st, host) {
|
||||
if(rrdcalc_is_matching_this_rrdset(rc, st)) {
|
||||
rrdsetcalc_link(st, rc);
|
||||
break;
|
||||
if(!rc->foreachdim) {
|
||||
// link it to the host alarms list
|
||||
if(likely(host->alarms)) {
|
||||
// append it
|
||||
RRDCALC *t;
|
||||
for(t = host->alarms; t && t->next ; t = t->next) ;
|
||||
t->next = rc;
|
||||
}
|
||||
else {
|
||||
host->alarms = rc;
|
||||
}
|
||||
|
||||
// link it to its chart
|
||||
RRDSET *st;
|
||||
rrdset_foreach_read(st, host) {
|
||||
if(rrdcalc_is_matching_this_rrdset(rc, st)) {
|
||||
rrdsetcalc_link(st, rc);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//link it case there is a foreach
|
||||
if(likely(host->alarms_with_foreach)) {
|
||||
// append it
|
||||
RRDCALC *t;
|
||||
for(t = host->alarms_with_foreach; t && t->next ; t = t->next) ;
|
||||
t->next = rc;
|
||||
}
|
||||
else {
|
||||
host->alarms_with_foreach = rc;
|
||||
}
|
||||
|
||||
//I am not linking this alarm direct to the host here, this will be done when the children is created
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -311,13 +373,19 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
|
|||
|
||||
RRDCALC *rc = callocz(1, sizeof(RRDCALC));
|
||||
rc->next_event_id = 1;
|
||||
rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id);
|
||||
rc->name = strdupz(rt->name);
|
||||
rc->hash = simple_hash(rc->name);
|
||||
rc->chart = strdupz(chart);
|
||||
rc->hash_chart = simple_hash(rc->chart);
|
||||
|
||||
rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id);
|
||||
|
||||
if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions);
|
||||
if(rt->foreachdim) {
|
||||
rc->foreachdim = strdupz(rt->foreachdim);
|
||||
rc->spdim = health_pattern_from_foreach(rc->foreachdim);
|
||||
}
|
||||
rc->foreachcounter = rt->foreachcounter;
|
||||
|
||||
rc->green = rt->green;
|
||||
rc->red = rt->red;
|
||||
|
@ -361,7 +429,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
|
|||
error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source);
|
||||
}
|
||||
|
||||
debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
|
||||
debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
|
||||
(rc->chart)?rc->chart:"NOCHART",
|
||||
rc->name,
|
||||
(rc->exec)?rc->exec:"DEFAULT",
|
||||
|
@ -373,6 +441,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
|
|||
rc->before,
|
||||
rc->options,
|
||||
(rc->dimensions)?rc->dimensions:"NONE",
|
||||
(rc->foreachdim)?rc->foreachdim:"NONE",
|
||||
rc->update_every,
|
||||
(rc->calculation)?rc->calculation->parsed_as:"NONE",
|
||||
(rc->warning)?rc->warning->parsed_as:"NONE",
|
||||
|
@ -387,18 +456,94 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
|
|||
);
|
||||
|
||||
rrdcalc_add_to_host(host, rc);
|
||||
RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)rc);
|
||||
if (rdcmp != rc) {
|
||||
error("Cannot insert the alarm index ID %s",rc->name);
|
||||
if(!rt->foreachdim) {
|
||||
RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)rc);
|
||||
if (rdcmp != rc) {
|
||||
error("Cannot insert the alarm index ID %s",rc->name);
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create from RRDCALC
|
||||
*
|
||||
* Create a new alarm using another alarm as template.
|
||||
*
|
||||
* @param rc is the alarm that will be used as source
|
||||
* @param host is the host structure.
|
||||
* @param name is the newest chart name.
|
||||
* @param dimension is the current dimension
|
||||
* @param foreachdim the whole list of dimension
|
||||
*
|
||||
* @return it returns the new alarm changed.
|
||||
*/
|
||||
inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension) {
|
||||
RRDCALC *newrc = callocz(1, sizeof(RRDCALC));
|
||||
|
||||
newrc->next_event_id = 1;
|
||||
newrc->id = rrdcalc_get_unique_id(host, rc->chart, name, &rc->next_event_id);
|
||||
newrc->name = (char *)name;
|
||||
newrc->hash = simple_hash(newrc->name);
|
||||
newrc->chart = strdupz(rc->chart);
|
||||
newrc->hash_chart = simple_hash(rc->chart);
|
||||
|
||||
newrc->dimensions = strdupz(dimension);
|
||||
newrc->foreachdim = NULL;
|
||||
rc->foreachcounter++;
|
||||
newrc->foreachcounter = rc->foreachcounter;
|
||||
|
||||
newrc->green = rc->green;
|
||||
newrc->red = rc->red;
|
||||
newrc->value = NAN;
|
||||
newrc->old_value = NAN;
|
||||
|
||||
newrc->delay_up_duration = rc->delay_up_duration;
|
||||
newrc->delay_down_duration = rc->delay_down_duration;
|
||||
newrc->delay_max_duration = rc->delay_max_duration;
|
||||
newrc->delay_multiplier = rc->delay_multiplier;
|
||||
|
||||
newrc->last_repeat = 0;
|
||||
newrc->warn_repeat_every = rc->warn_repeat_every;
|
||||
newrc->crit_repeat_every = rc->crit_repeat_every;
|
||||
|
||||
newrc->group = rc->group;
|
||||
newrc->after = rc->after;
|
||||
newrc->before = rc->before;
|
||||
newrc->update_every = rc->update_every;
|
||||
newrc->options = rc->options;
|
||||
|
||||
if(rc->exec) newrc->exec = strdupz(rc->exec);
|
||||
if(rc->recipient) newrc->recipient = strdupz(rc->recipient);
|
||||
if(rc->source) newrc->source = strdupz(rc->source);
|
||||
if(rc->units) newrc->units = strdupz(rc->units);
|
||||
if(rc->info) newrc->info = strdupz(rc->info);
|
||||
|
||||
if(rc->calculation) {
|
||||
newrc->calculation = expression_parse(rc->calculation->source, NULL, NULL);
|
||||
if(!newrc->calculation)
|
||||
error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rc->chart, rc->name, rc->calculation->source);
|
||||
}
|
||||
|
||||
if(rc->warning) {
|
||||
newrc->warning = expression_parse(rc->warning->source, NULL, NULL);
|
||||
if(!newrc->warning)
|
||||
error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rc->chart, rc->name, rc->warning->source);
|
||||
}
|
||||
|
||||
if(rc->critical) {
|
||||
newrc->critical = expression_parse(rc->critical->source, NULL, NULL);
|
||||
if(!newrc->critical)
|
||||
error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rc->chart, rc->name, rc->critical->source);
|
||||
}
|
||||
|
||||
return newrc;
|
||||
}
|
||||
|
||||
void rrdcalc_free(RRDCALC *rc) {
|
||||
if(unlikely(!rc)) return;
|
||||
|
||||
|
||||
expression_free(rc->calculation);
|
||||
expression_free(rc->warning);
|
||||
expression_free(rc->critical);
|
||||
|
@ -407,11 +552,13 @@ void rrdcalc_free(RRDCALC *rc) {
|
|||
freez(rc->chart);
|
||||
freez(rc->family);
|
||||
freez(rc->dimensions);
|
||||
freez(rc->foreachdim);
|
||||
freez(rc->exec);
|
||||
freez(rc->recipient);
|
||||
freez(rc->source);
|
||||
freez(rc->units);
|
||||
freez(rc->info);
|
||||
simple_pattern_free(rc->spdim);
|
||||
freez(rc);
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ struct rrdcalc {
|
|||
uint32_t next_event_id; // the next event id that will be used for this alarm
|
||||
|
||||
char *name; // the name of this alarm
|
||||
uint32_t hash;
|
||||
uint32_t hash; // the hash of the alarm name
|
||||
|
||||
char *exec; // the command to execute when this alarm switches state
|
||||
char *recipient; // the recipient of the alarm (the first parameter to exec)
|
||||
|
@ -59,7 +59,11 @@ struct rrdcalc {
|
|||
// database lookup settings
|
||||
|
||||
char *dimensions; // the chart dimensions
|
||||
RRDR_GROUPING group; // grouping method: average, max, etc.
|
||||
char *foreachdim; // the group of dimensions that the `foreach` will be applied.
|
||||
SIMPLE_PATTERN *spdim; // used if and only if there is a simple pattern for the chart.
|
||||
int foreachcounter; // the number of alarms created with foreachdim, this also works as an id of the
|
||||
// children
|
||||
RRDR_GROUPING group; // grouping method: average, max, etc.
|
||||
int before; // ending point in time-series
|
||||
int after; // starting point in time-series
|
||||
uint32_t options; // calculation options
|
||||
|
@ -148,7 +152,10 @@ extern void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc);
|
|||
extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name);
|
||||
extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id);
|
||||
extern RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart);
|
||||
extern RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension);
|
||||
extern void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc);
|
||||
extern void dimension_remove_pipe_comma(char *str);
|
||||
extern char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen);
|
||||
|
||||
static inline int rrdcalc_isrepeating(RRDCALC *rc) {
|
||||
if (unlikely(rc->warn_repeat_every > 0 || rc->crit_repeat_every > 0)) {
|
||||
|
|
|
@ -5,23 +5,35 @@
|
|||
|
||||
// ----------------------------------------------------------------------------
|
||||
// RRDCALCTEMPLATE management
|
||||
/**
|
||||
* RRDCALC TEMPLATE LINK MATCHING
|
||||
*
|
||||
* @param rt is the template used to create the chart.
|
||||
* @param st is the chart where the alarm will be attached.
|
||||
*/
|
||||
void rrdcalctemplate_link_matching_test(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host ) {
|
||||
if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context)
|
||||
&& (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) {
|
||||
RRDCALC *rc = rrdcalc_create_from_template(host, rt, st->id);
|
||||
if(unlikely(!rc))
|
||||
info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname);
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
else if(rc->rrdset != st && !rc->foreachdim) //When we have a template with foreadhdim, the child will be added to the index late
|
||||
error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart?rc->chart:"NOCHART", rc->name, st->id);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void rrdcalctemplate_link_matching(RRDSET *st) {
|
||||
RRDHOST *host = st->rrdhost;
|
||||
RRDCALCTEMPLATE *rt;
|
||||
|
||||
for(rt = host->templates; rt ; rt = rt->next) {
|
||||
if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context)
|
||||
&& (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) {
|
||||
RRDCALC *rc = rrdcalc_create_from_template(host, rt, st->id);
|
||||
if(unlikely(!rc))
|
||||
info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname);
|
||||
rrdcalctemplate_link_matching_test(rt, st, host);
|
||||
}
|
||||
|
||||
#ifdef NETDATA_INTERNAL_CHECKS
|
||||
else if(rc->rrdset != st)
|
||||
error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart?rc->chart:"NOCHART", rc->name, st->id);
|
||||
#endif
|
||||
}
|
||||
for(rt = host->alarms_template_with_foreach; rt ; rt = rt->next) {
|
||||
rrdcalctemplate_link_matching_test(rt, st, host);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,6 +55,8 @@ inline void rrdcalctemplate_free(RRDCALCTEMPLATE *rt) {
|
|||
freez(rt->units);
|
||||
freez(rt->info);
|
||||
freez(rt->dimensions);
|
||||
freez(rt->foreachdim);
|
||||
simple_pattern_free(rt->spdim);
|
||||
freez(rt);
|
||||
}
|
||||
|
||||
|
@ -67,5 +81,3 @@ inline void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt)
|
|||
|
||||
rrdcalctemplate_free(rt);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -35,7 +35,11 @@ struct rrdcalctemplate {
|
|||
// database lookup settings
|
||||
|
||||
char *dimensions; // the chart dimensions
|
||||
RRDR_GROUPING group; // grouping method: average, max, etc.
|
||||
char *foreachdim; // the group of dimensions that the lookup will be applied.
|
||||
SIMPLE_PATTERN *spdim; // used if and only if there is a simple pattern for the chart.
|
||||
int foreachcounter; // the number of alarms created with foreachdim, this also works as an id of the
|
||||
// children
|
||||
RRDR_GROUPING group; // grouping method: average, max, etc.
|
||||
int before; // ending point in time-series
|
||||
int after; // starting point in time-series
|
||||
uint32_t options; // calculation options
|
||||
|
@ -70,5 +74,5 @@ extern void rrdcalctemplate_link_matching(RRDSET *st);
|
|||
|
||||
extern void rrdcalctemplate_free(RRDCALCTEMPLATE *rt);
|
||||
extern void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt);
|
||||
|
||||
extern void rrdcalctemplate_create_alarms(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st);
|
||||
#endif //NETDATA_RRDCALCTEMPLATE_H
|
||||
|
|
|
@ -156,6 +156,35 @@ static time_t rrddim_query_oldest_time(RRDDIM *rd) {
|
|||
// ----------------------------------------------------------------------------
|
||||
// RRDDIM create a dimension
|
||||
|
||||
void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host) {
|
||||
RRDCALC *rrdc;
|
||||
for (rrdc = host->alarms_with_foreach; rrdc ; rrdc = rrdc->next) {
|
||||
if (simple_pattern_matches(rrdc->spdim, rd->id) || simple_pattern_matches(rrdc->spdim, rd->name)) {
|
||||
if (!strcmp(rrdc->chart, st->name)) {
|
||||
char *usename = alarm_name_with_dim(rrdc->name, strlen(rrdc->name), rd->name, strlen(rd->name));
|
||||
if (usename) {
|
||||
if(rrdcalc_exists(host, st->name, usename, 0, 0)){
|
||||
freez(usename);
|
||||
continue;
|
||||
}
|
||||
|
||||
RRDCALC *child = rrdcalc_create_from_rrdcalc(rrdc, host, usename, rd->name);
|
||||
if (child) {
|
||||
rrdcalc_add_to_host(host, child);
|
||||
RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)child);
|
||||
if (rdcmp != child) {
|
||||
error("Cannot insert the alarm index ID %s",child->name);
|
||||
}
|
||||
} else {
|
||||
error("Cannot allocate a new alarm.");
|
||||
rrdc->foreachcounter--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode) {
|
||||
rrdset_wrlock(st);
|
||||
|
||||
|
@ -371,6 +400,12 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
|
|||
if(unlikely(rrddim_index_add(st, rd) != rd))
|
||||
error("RRDDIM: INTERNAL ERROR: attempt to index duplicate dimension '%s' on chart '%s'", rd->id, st->id);
|
||||
|
||||
if(host->alarms_with_foreach || host->alarms_template_with_foreach) {
|
||||
rrdhost_wrlock(host);
|
||||
rrdcalc_link_to_rrddim(rd, st, host);
|
||||
|
||||
rrdhost_unlock(host);
|
||||
}
|
||||
rrdset_unlock(st);
|
||||
return(rd);
|
||||
}
|
||||
|
|
|
@ -598,9 +598,23 @@ void rrdhost_free(RRDHOST *host) {
|
|||
while(host->alarms)
|
||||
rrdcalc_unlink_and_free(host, host->alarms);
|
||||
|
||||
RRDCALC *rc,*nc;
|
||||
for(rc = host->alarms_with_foreach; rc ; rc = nc) {
|
||||
nc = rc->next;
|
||||
rrdcalc_free(rc);
|
||||
}
|
||||
host->alarms_with_foreach = NULL;
|
||||
|
||||
while(host->templates)
|
||||
rrdcalctemplate_unlink_and_free(host, host->templates);
|
||||
|
||||
RRDCALCTEMPLATE *rt,*next;
|
||||
for(rt = host->alarms_template_with_foreach; rt ; rt = next) {
|
||||
next = rt->next;
|
||||
rrdcalctemplate_free(rt);
|
||||
}
|
||||
host->alarms_template_with_foreach = NULL;
|
||||
|
||||
debug(D_RRD_CALLS, "RRDHOST: Cleaning up remaining host variables for host '%s'", host->hostname);
|
||||
rrdvar_free_remaining_variables(host, &host->rrdvar_root_index);
|
||||
|
||||
|
|
|
@ -163,7 +163,7 @@ This line makes a database lookup to find a value. This result of this lookup is
|
|||
The format is:
|
||||
|
||||
```
|
||||
lookup: METHOD AFTER [at BEFORE] [every DURATION] [OPTIONS] [of DIMENSIONS]
|
||||
lookup: METHOD AFTER [at BEFORE] [every DURATION] [OPTIONS] [of DIMENSIONS] [foreach DIMENSIONS]
|
||||
```
|
||||
|
||||
Everything is the same with [badges](../web/api/badges/). In short:
|
||||
|
@ -190,6 +190,11 @@ Everything is the same with [badges](../web/api/badges/). In short:
|
|||
have spaces in their names). This accepts Netdata simple patterns and the `match-ids` and
|
||||
`match-names` options affect the searches for dimensions.
|
||||
|
||||
- `foreach DIMENSIONS` is optional, will always be the last parameter, and uses the same `,`/`|`
|
||||
rules as the `of` parameter. Each dimension you specify in `foreach` will use the same rule
|
||||
to trigger an alarm. If you set both `of` and `foreach`, Netdata will ignore the `of` parameter
|
||||
and replace it with one of the dimensions you gave to `foreach`.
|
||||
|
||||
The result of the lookup will be available as `$this` and `$NAME` in expressions.
|
||||
The timestamps of the timeframe evaluated by the database lookup is available as variables
|
||||
`$after` and `$before` (both are unix timestamps).
|
||||
|
@ -660,6 +665,43 @@ Note that the drops chart does not exist if a network interface has never droppe
|
|||
When Netdata detects a dropped packet, it will add the chart and it will automatically attach this
|
||||
alarm to it.
|
||||
|
||||
### Example 5
|
||||
|
||||
Check if user or system dimension is using more than 50% of cpu:
|
||||
|
||||
```
|
||||
alarm: dim_template
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: average -3s percentage foreach system,user
|
||||
units: %
|
||||
every: 10s
|
||||
warn: $this > 50
|
||||
crit: $this > 80
|
||||
```
|
||||
|
||||
The `lookup` line will calculate the average CPU usage from system and user in the last 3 seconds. Because we have
|
||||
the foreach in the `lookup` line, Netdata will create two independent alarms called `dim_template_system`
|
||||
and `dim_template_user` that will have all the other parameters shared among them.
|
||||
|
||||
### Example 6
|
||||
|
||||
Check if all dimensions are using more than 50% of cpu:
|
||||
|
||||
```
|
||||
alarm: dim_template
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: average -3s percentage foreach *
|
||||
units: %
|
||||
every: 10s
|
||||
warn: $this > 50
|
||||
crit: $this > 80
|
||||
```
|
||||
|
||||
The `lookup` line will calculate the average of CPU usage from system and user in the last 3 seconds. In this case
|
||||
Netdata will create alarms for all dimensions of the chart.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
You can compile Netdata with [debugging](../daemon#debugging) and then set in `netdata.conf`:
|
||||
|
|
|
@ -113,9 +113,23 @@ void health_reload_host(RRDHOST *host) {
|
|||
while(host->templates)
|
||||
rrdcalctemplate_unlink_and_free(host, host->templates);
|
||||
|
||||
RRDCALCTEMPLATE *rt,*next;
|
||||
for(rt = host->alarms_template_with_foreach; rt ; rt = next) {
|
||||
next = rt->next;
|
||||
rrdcalctemplate_free(rt);
|
||||
}
|
||||
host->alarms_template_with_foreach = NULL;
|
||||
|
||||
while(host->alarms)
|
||||
rrdcalc_unlink_and_free(host, host->alarms);
|
||||
|
||||
RRDCALC *rc,*nc;
|
||||
for(rc = host->alarms_with_foreach; rc ; rc = nc) {
|
||||
nc = rc->next;
|
||||
rrdcalc_free(rc);
|
||||
}
|
||||
host->alarms_with_foreach = NULL;
|
||||
|
||||
rrdhost_unlock(host);
|
||||
|
||||
// invalidate all previous entries in the alarm log
|
||||
|
@ -139,9 +153,17 @@ void health_reload_host(RRDHOST *host) {
|
|||
health_readdir(host, user_path, stock_path, NULL);
|
||||
|
||||
// link the loaded alarms to their charts
|
||||
RRDDIM *rd;
|
||||
rrdset_foreach_write(st, host) {
|
||||
rrdsetcalc_link_matching(st);
|
||||
rrdcalctemplate_link_matching(st);
|
||||
|
||||
//This loop must be the last, because ` rrdcalctemplate_link_matching` will create alarms related to it.
|
||||
rrdset_rdlock(st);
|
||||
rrddim_foreach_read(rd, st) {
|
||||
rrdcalc_link_to_rrddim(rd, st, host);
|
||||
}
|
||||
rrdset_unlock(st);
|
||||
}
|
||||
|
||||
rrdhost_unlock(host);
|
||||
|
@ -888,6 +910,7 @@ void *health_main(void *ptr) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) {
|
||||
rc->last_repeat = now;
|
||||
ALARM_ENTRY *ae = health_create_alarm_entry(
|
||||
|
|
|
@ -48,6 +48,7 @@ extern unsigned int default_health_enabled;
|
|||
#define HEALTH_INFO_KEY "info"
|
||||
#define HEALTH_DELAY_KEY "delay"
|
||||
#define HEALTH_OPTIONS_KEY "options"
|
||||
#define HEALTH_FOREACH_KEY "foreach"
|
||||
|
||||
#define HEALTH_SILENCERS_MAX_FILE_LEN 10000
|
||||
|
||||
|
@ -106,4 +107,6 @@ extern void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae);
|
|||
|
||||
extern void *health_cmdapi_thread(void *ptr);
|
||||
|
||||
extern SIMPLE_PATTERN *health_pattern_from_foreach(char *s);
|
||||
|
||||
#endif //NETDATA_HEALTH_H
|
||||
|
|
|
@ -46,7 +46,7 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) {
|
|||
|
||||
rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id);
|
||||
|
||||
debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
|
||||
debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
|
||||
rc->chart?rc->chart:"NOCHART",
|
||||
rc->name,
|
||||
rc->id,
|
||||
|
@ -59,6 +59,7 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) {
|
|||
rc->before,
|
||||
rc->options,
|
||||
(rc->dimensions)?rc->dimensions:"NONE",
|
||||
(rc->foreachdim)?rc->foreachdim:"NONE",
|
||||
rc->update_every,
|
||||
(rc->calculation)?rc->calculation->parsed_as:"NONE",
|
||||
(rc->warning)?rc->warning->parsed_as:"NONE",
|
||||
|
@ -73,6 +74,7 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) {
|
|||
);
|
||||
|
||||
rrdcalc_add_to_host(host, rc);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -93,48 +95,70 @@ static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCAL
|
|||
}
|
||||
|
||||
RRDCALCTEMPLATE *t, *last = NULL;
|
||||
for (t = host->templates; t ; last = t, t = t->next) {
|
||||
if(unlikely(t->hash_name == rt->hash_name
|
||||
&& !strcmp(t->name, rt->name)
|
||||
&& !strcmp(t->family_match?t->family_match:"*", rt->family_match?rt->family_match:"*")
|
||||
)) {
|
||||
error("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
|
||||
return 0;
|
||||
if(!rt->foreachdim) {
|
||||
for (t = host->templates; t ; last = t, t = t->next) {
|
||||
if(unlikely(t->hash_name == rt->hash_name
|
||||
&& !strcmp(t->name, rt->name)
|
||||
&& !strcmp(t->family_match?t->family_match:"*", rt->family_match?rt->family_match:"*")
|
||||
)) {
|
||||
error("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(likely(last)) {
|
||||
last->next = rt;
|
||||
}
|
||||
else {
|
||||
rt->next = host->templates;
|
||||
host->templates = rt;
|
||||
}
|
||||
} else {
|
||||
for (t = host->alarms_template_with_foreach; t ; last = t, t = t->next) {
|
||||
if(unlikely(t->hash_name == rt->hash_name
|
||||
&& !strcmp(t->name, rt->name)
|
||||
&& !strcmp(t->family_match?t->family_match:"*", rt->family_match?rt->family_match:"*")
|
||||
)) {
|
||||
error("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(likely(last)) {
|
||||
last->next = rt;
|
||||
}
|
||||
else {
|
||||
rt->next = host->alarms_template_with_foreach;
|
||||
host->alarms_template_with_foreach = rt;
|
||||
}
|
||||
}
|
||||
|
||||
debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
|
||||
rt->name,
|
||||
(rt->context)?rt->context:"NONE",
|
||||
(rt->exec)?rt->exec:"DEFAULT",
|
||||
(rt->recipient)?rt->recipient:"DEFAULT",
|
||||
rt->green,
|
||||
rt->red,
|
||||
(int)rt->group,
|
||||
rt->after,
|
||||
rt->before,
|
||||
rt->options,
|
||||
(rt->dimensions)?rt->dimensions:"NONE",
|
||||
rt->update_every,
|
||||
(rt->calculation)?rt->calculation->parsed_as:"NONE",
|
||||
(rt->warning)?rt->warning->parsed_as:"NONE",
|
||||
(rt->critical)?rt->critical->parsed_as:"NONE",
|
||||
rt->source,
|
||||
rt->delay_up_duration,
|
||||
rt->delay_down_duration,
|
||||
rt->delay_max_duration,
|
||||
rt->delay_multiplier,
|
||||
rt->warn_repeat_every,
|
||||
rt->crit_repeat_every
|
||||
debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
|
||||
rt->name,
|
||||
(rt->context)?rt->context:"NONE",
|
||||
(rt->exec)?rt->exec:"DEFAULT",
|
||||
(rt->recipient)?rt->recipient:"DEFAULT",
|
||||
rt->green,
|
||||
rt->red,
|
||||
(int)rt->group,
|
||||
rt->after,
|
||||
rt->before,
|
||||
rt->options,
|
||||
(rt->dimensions)?rt->dimensions:"NONE",
|
||||
(rt->foreachdim)?rt->foreachdim:"NONE",
|
||||
rt->update_every,
|
||||
(rt->calculation)?rt->calculation->parsed_as:"NONE",
|
||||
(rt->warning)?rt->warning->parsed_as:"NONE",
|
||||
(rt->critical)?rt->critical->parsed_as:"NONE",
|
||||
rt->source,
|
||||
rt->delay_up_duration,
|
||||
rt->delay_down_duration,
|
||||
rt->delay_max_duration,
|
||||
rt->delay_multiplier,
|
||||
rt->warn_repeat_every,
|
||||
rt->crit_repeat_every
|
||||
);
|
||||
|
||||
if(likely(last)) {
|
||||
last->next = rt;
|
||||
}
|
||||
else {
|
||||
rt->next = host->templates;
|
||||
host->templates = rt;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -291,16 +315,37 @@ static inline int health_parse_repeat(
|
|||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Health pattern from Foreach
|
||||
*
|
||||
* Create a new simple pattern using the user input
|
||||
*
|
||||
* @param s the string that will be used to create the simple pattern.
|
||||
*/
|
||||
SIMPLE_PATTERN *health_pattern_from_foreach(char *s) {
|
||||
char *convert= strdupz(s);
|
||||
SIMPLE_PATTERN *val = NULL;
|
||||
if(convert) {
|
||||
dimension_remove_pipe_comma(convert);
|
||||
val = simple_pattern_create(convert, NULL, SIMPLE_PATTERN_EXACT);
|
||||
|
||||
freez(convert);
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline int health_parse_db_lookup(
|
||||
size_t line, const char *filename, char *string,
|
||||
RRDR_GROUPING *group_method, int *after, int *before, int *every,
|
||||
uint32_t *options, char **dimensions
|
||||
uint32_t *options, char **dimensions, char **foreachdim
|
||||
) {
|
||||
debug(D_HEALTH, "Health configuration parsing database lookup %zu@%s: %s", line, filename, string);
|
||||
|
||||
if(*dimensions) freez(*dimensions);
|
||||
if(*foreachdim) freez(*foreachdim);
|
||||
*dimensions = NULL;
|
||||
*foreachdim = NULL;
|
||||
*after = 0;
|
||||
*before = 0;
|
||||
*every = 0;
|
||||
|
@ -387,8 +432,22 @@ static inline int health_parse_db_lookup(
|
|||
*options |= RRDR_OPTION_MATCH_NAMES;
|
||||
}
|
||||
else if(!strcasecmp(key, "of")) {
|
||||
if(*s && strcasecmp(s, "all") != 0)
|
||||
char *find = NULL;
|
||||
if(*s && strcasecmp(s, "all") != 0) {
|
||||
find = strcasestr(s, " foreach");
|
||||
if(find) {
|
||||
*find = '\0';
|
||||
}
|
||||
*dimensions = strdupz(s);
|
||||
}
|
||||
|
||||
if(!find) {
|
||||
break;
|
||||
}
|
||||
s = ++find;
|
||||
}
|
||||
else if(!strcasecmp(key, HEALTH_FOREACH_KEY )) {
|
||||
*foreachdim = strdupz(s);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
|
@ -521,8 +580,12 @@ static int health_readfile(const char *filename, void *data) {
|
|||
uint32_t hash = simple_uhash(key);
|
||||
|
||||
if(hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) {
|
||||
if (rc && (ignore_this || !rrdcalc_add_alarm_from_config(host, rc)))
|
||||
rrdcalc_free(rc);
|
||||
if(rc) {
|
||||
if(ignore_this || !rrdcalc_add_alarm_from_config(host, rc)) {
|
||||
rrdcalc_free(rc);
|
||||
}
|
||||
// health_add_alarms_loop(host, rc, ignore_this) ;
|
||||
}
|
||||
|
||||
if(rt) {
|
||||
if (ignore_this || !rrdcalctemplate_add_template_from_config(host, rt))
|
||||
|
@ -552,14 +615,18 @@ static int health_readfile(const char *filename, void *data) {
|
|||
}
|
||||
else if(hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) {
|
||||
if(rc) {
|
||||
if(ignore_this || !rrdcalc_add_alarm_from_config(host, rc))
|
||||
// health_add_alarms_loop(host, rc, ignore_this) ;
|
||||
if(ignore_this || !rrdcalc_add_alarm_from_config(host, rc)) {
|
||||
rrdcalc_free(rc);
|
||||
}
|
||||
|
||||
rc = NULL;
|
||||
}
|
||||
|
||||
if(rt && (ignore_this || !rrdcalctemplate_add_template_from_config(host, rt)))
|
||||
rrdcalctemplate_free(rt);
|
||||
if(rt) {
|
||||
if(ignore_this || !rrdcalctemplate_add_template_from_config(host, rt))
|
||||
rrdcalctemplate_free(rt);
|
||||
}
|
||||
|
||||
rt = callocz(1, sizeof(RRDCALCTEMPLATE));
|
||||
rt->name = strdupz(value);
|
||||
|
@ -622,8 +689,10 @@ static int health_readfile(const char *filename, void *data) {
|
|||
}
|
||||
else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) {
|
||||
health_parse_db_lookup(line, filename, value, &rc->group, &rc->after, &rc->before,
|
||||
&rc->update_every,
|
||||
&rc->options, &rc->dimensions);
|
||||
&rc->update_every, &rc->options, &rc->dimensions, &rc->foreachdim);
|
||||
if(rc->foreachdim) {
|
||||
rc->spdim = health_pattern_from_foreach(rc->foreachdim);
|
||||
}
|
||||
}
|
||||
else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) {
|
||||
if(!config_parse_duration(value, &rc->update_every))
|
||||
|
@ -752,7 +821,10 @@ static int health_readfile(const char *filename, void *data) {
|
|||
}
|
||||
else if(hash == hash_lookup && !strcasecmp(key, HEALTH_LOOKUP_KEY)) {
|
||||
health_parse_db_lookup(line, filename, value, &rt->group, &rt->after, &rt->before,
|
||||
&rt->update_every, &rt->options, &rt->dimensions);
|
||||
&rt->update_every, &rt->options, &rt->dimensions, &rt->foreachdim);
|
||||
if(rt->foreachdim) {
|
||||
rt->spdim = health_pattern_from_foreach(rt->foreachdim);
|
||||
}
|
||||
}
|
||||
else if(hash == hash_every && !strcasecmp(key, HEALTH_EVERY_KEY)) {
|
||||
if(!config_parse_duration(value, &rt->update_every))
|
||||
|
@ -866,11 +938,17 @@ static int health_readfile(const char *filename, void *data) {
|
|||
}
|
||||
}
|
||||
|
||||
if(rc && (ignore_this || !rrdcalc_add_alarm_from_config(host, rc)))
|
||||
rrdcalc_free(rc);
|
||||
if(rc) {
|
||||
//health_add_alarms_loop(host, rc, ignore_this) ;
|
||||
if(ignore_this || !rrdcalc_add_alarm_from_config(host, rc)) {
|
||||
rrdcalc_free(rc);
|
||||
}
|
||||
}
|
||||
|
||||
if(rt && (ignore_this || !rrdcalctemplate_add_template_from_config(host, rt)))
|
||||
rrdcalctemplate_free(rt);
|
||||
if(rt) {
|
||||
if(ignore_this || !rrdcalctemplate_add_template_from_config(host, rt))
|
||||
rrdcalctemplate_free(rt);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return 1;
|
||||
|
@ -881,5 +959,6 @@ void health_readdir(RRDHOST *host, const char *user_path, const char *stock_path
|
|||
debug(D_HEALTH, "CONFIG health is not enabled for host '%s'", host->hostname);
|
||||
return;
|
||||
}
|
||||
|
||||
recursive_config_double_dir_load(user_path, stock_path, subpath, health_readfile, (void *) host, 0);
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ CLEANFILES = \
|
|||
acls/acl.sh \
|
||||
urls/request.sh \
|
||||
alarm_repetition/alarm.sh \
|
||||
template_dimension/template_dim.sh \
|
||||
$(NULL)
|
||||
|
||||
include $(top_srcdir)/build/subst.inc
|
||||
|
@ -28,6 +29,7 @@ dist_noinst_DATA = \
|
|||
acls/acl.sh.in \
|
||||
urls/request.sh.in \
|
||||
alarm_repetition/alarm.sh.in \
|
||||
template_dimension/template_dim.sh.in \
|
||||
$(NULL)
|
||||
|
||||
dist_plugins_SCRIPTS = \
|
||||
|
@ -35,6 +37,7 @@ dist_plugins_SCRIPTS = \
|
|||
acls/acl.sh \
|
||||
urls/request.sh \
|
||||
alarm_repetition/alarm.sh \
|
||||
template_dimension/template_dim.sh \
|
||||
$(NULL)
|
||||
|
||||
dist_noinst_SCRIPTS = \
|
||||
|
|
8
tests/template_dimension/system_cpu.conf.alarm_foreach
Normal file
8
tests/template_dimension/system_cpu.conf.alarm_foreach
Normal file
|
@ -0,0 +1,8 @@
|
|||
alarm: dev_dim_template
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage foreach system,user,nice
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
|
@ -0,0 +1,8 @@
|
|||
alarm: dev_dim_template
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage foreach *
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
26
tests/template_dimension/system_cpu.conf.template_alarm
Normal file
26
tests/template_dimension/system_cpu.conf.template_alarm
Normal file
|
@ -0,0 +1,26 @@
|
|||
template: dev_dim_template_system
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage of system
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
||||
|
||||
template: dev_dim_template_user
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage of user
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
||||
|
||||
template: dev_dim_template_nice
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage of nice
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
|
@ -0,0 +1,8 @@
|
|||
template: dev_dim_template
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage foreach system,user,nice
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
|
@ -0,0 +1,8 @@
|
|||
template: dev_dim_template
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage foreach *
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
26
tests/template_dimension/system_cpu.conf.unique_alarm
Normal file
26
tests/template_dimension/system_cpu.conf.unique_alarm
Normal file
|
@ -0,0 +1,26 @@
|
|||
alarm: dev_dim_template_system
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage of system
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
||||
|
||||
alarm: dev_dim_template_user
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage of user
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
||||
|
||||
alarm: dev_dim_template_nice
|
||||
on: system.cpu
|
||||
os: linux
|
||||
lookup: sum -3s at 0 every 3 percentage of nice
|
||||
units: %
|
||||
every: 1s
|
||||
warn: $this > 1
|
||||
crit: $this > 4
|
88
tests/template_dimension/template_dim.sh.in
Normal file
88
tests/template_dimension/template_dim.sh.in
Normal file
|
@ -0,0 +1,88 @@
|
|||
#!/bin/bash
|
||||
|
||||
#The health directory to put the alarms
|
||||
HEALTHDIR="@configdir_POST@/health.d/"
|
||||
|
||||
#the current time
|
||||
OUTDIR="alarms"
|
||||
QUERY="/api/v1/alarms?all"
|
||||
MURL="http://localhost:19999$QUERY"
|
||||
|
||||
#error messages
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
NOCOLOR='\033[0m'
|
||||
|
||||
ALARMTEST="dev_dim_template"
|
||||
|
||||
change_alarm_file() {
|
||||
if [ -f "$1" ]; then
|
||||
rm "$1"
|
||||
fi
|
||||
|
||||
#copy keeping the permissions
|
||||
cp -a "$2" "$3"
|
||||
}
|
||||
|
||||
netdata_test_download() {
|
||||
grep "HTTP/1.1 200 OK" "$1" 2>/dev/null 1>/dev/null
|
||||
TEST="$?"
|
||||
if [ "$TEST" -ne "0" ]; then
|
||||
echo -e "${RED} Error to get the alarm log. ${NOCOLOR}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TOTALARM=$(grep "$ALARMTEST" "$2" | grep name | cut -d: -f2 | grep -c "$ALARMTEST")
|
||||
|
||||
if [ "$TOTALARM" -ne "$3" ]; then
|
||||
echo -e "${RED} The number of actives alarms with the name $SYSTEMALARM is wrong ${NOCOLOR}"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
get_the_logs() {
|
||||
curl -v -k --create-dirs -o "$OUTDIR/$1.out" "$MURL" 2> "$OUTDIR/$1.err"
|
||||
netdata_test_download "$OUTDIR/$1.err" "$OUTDIR/$1.out" "$2"
|
||||
}
|
||||
|
||||
process_data() {
|
||||
netdata -D &
|
||||
NETDATAPID=$!
|
||||
echo -e "${NOCOLOR}Sleeping during 15 seconds to create alarms"
|
||||
sleep 15
|
||||
kill $NETDATAPID
|
||||
get_the_logs "$1" "$2"
|
||||
}
|
||||
|
||||
mkdir "$OUTDIR"
|
||||
CREATEDIR="$?"
|
||||
if [ "$CREATEDIR" -ne "0" ]; then
|
||||
echo -e "${RED}Cannot create the output directory, it already exists. The test will overwrite previous results. ${NOCOLOR}"
|
||||
fi
|
||||
|
||||
if [ -n "$1" ]; then
|
||||
MURL="$1$QUERY"
|
||||
fi
|
||||
|
||||
change_alarm_file "./0" "system_cpu.conf.unique_alarm" "$HEALTHDIR/dim_double_without_template.conf"
|
||||
process_data "double_without_template" 3 "$HEALTHDIR/dim_double_without_template.conf"
|
||||
|
||||
change_alarm_file "$HEALTHDIR/dim_double_without_template.conf" "system_cpu.conf.alarm_foreach" "$HEALTHDIR/dim_foreach_without_template.conf"
|
||||
process_data "foreach_without_template" 3 "$HEALTHDIR/dim_foreach_without_template.conf"
|
||||
|
||||
change_alarm_file "$HEALTHDIR/dim_foreach_without_template.conf" "system_cpu.conf.alarm_foreach_sp" "$HEALTHDIR/dim_foreach_without_template_sp.conf"
|
||||
process_data "foreach_without_template" 10 "$HEALTHDIR/dim_foreach_without_template_sp.conf"
|
||||
|
||||
change_alarm_file "$HEALTHDIR/dim_foreach_without_template_sp.conf" "system_cpu.conf.template_alarm" "$HEALTHDIR/dim_double_with_template.conf"
|
||||
process_data "double_with_template" 3 "$HEALTHDIR/dim_double_with_template.conf"
|
||||
|
||||
change_alarm_file "$HEALTHDIR/dim_double_with_template.conf" "system_cpu.conf.template_foreach" "$HEALTHDIR/dim_foreach_with_template.conf"
|
||||
process_data "foreach_with_template" 3 "$HEALTHDIR/dim_foreach_with_template.conf"
|
||||
|
||||
change_alarm_file "$HEALTHDIR/dim_foreach_with_template.conf" "system_cpu.conf.template_foreach_sp" "$HEALTHDIR/dim_foreach_with_template_sp.conf"
|
||||
process_data "foreach_with_template" 10 "$HEALTHDIR/dim_foreach_with_template_sp.conf"
|
||||
|
||||
rm "$HEALTHDIR/dim_foreach_with_template_sp.conf"
|
||||
rm -rf "$OUTDIR"
|
||||
|
||||
echo -e "${GREEN} all the tests were sucessful ${NOCOLOR}"
|
Loading…
Add table
Reference in a new issue