From 708efb41bdf952c84b60326d40c07cc69e58d19c Mon Sep 17 00:00:00 2001
From: Emmanuel Vasilakis <mrzammler@mm.st>
Date: Tue, 16 Aug 2022 10:33:08 +0300
Subject: [PATCH] Support chart labels in alerts (#13290)

* chart labels for alerts

* proper termination

* use strchr

* change if statement

* change label variable. add docs

* change doc

* assign buf to temp

* use new dictionary functions

* reduce variable scope

* reduce line length

* make sure rrdcalc updates labels after inserted

* reduce var scope

* add rrdcalc.c for cmocka tests

* Revert "add rrdcalc.c for cmocka tests"

This reverts commit 5fe122adcf7abcbe6d67fa2ebd7c4ff8620cf9c8.

* Fix cmocka unit tests

* valgrind errors

Co-authored-by: Vladimir Kobal <vlad@prokk.net>
---
 .../cgroups.plugin/tests/test_doubles.c       |  4 ++
 collectors/proc.plugin/proc_diskstats.c       |  2 +
 collectors/proc.plugin/proc_mdstat.c          |  1 +
 collectors/proc.plugin/proc_net_wireless.c    |  1 +
 .../proc.plugin/sys_class_power_supply.c      |  1 +
 collectors/proc.plugin/sys_fs_btrfs.c         |  1 +
 database/rrd.h                                |  1 +
 database/rrdcalc.c                            | 69 ++++++++++++++++++-
 database/rrdcalc.h                            |  7 ++
 database/rrdlabels.c                          | 15 +++-
 exporting/tests/netdata_doubles.c             |  4 ++
 health/REFERENCE.md                           | 38 +++++++++-
 health/health_config.c                        |  2 +
 health/health_json.c                          | 37 +---------
 health/health_log.c                           | 19 +----
 15 files changed, 147 insertions(+), 55 deletions(-)

diff --git a/collectors/cgroups.plugin/tests/test_doubles.c b/collectors/cgroups.plugin/tests/test_doubles.c
index 6203d444cd..6d85bc0f12 100644
--- a/collectors/cgroups.plugin/tests/test_doubles.c
+++ b/collectors/cgroups.plugin/tests/test_doubles.c
@@ -153,3 +153,7 @@ void sql_store_chart_label(uuid_t *chart_uuid, int source_type, char *label, cha
     UNUSED(label);
     UNUSED(value);
 }
+
+void rrdcalc_update_rrdlabels(RRDSET *st) {
+    (void)st;
+}
diff --git a/collectors/proc.plugin/proc_diskstats.c b/collectors/proc.plugin/proc_diskstats.c
index be4a481cdb..64dc5ae690 100644
--- a/collectors/proc.plugin/proc_diskstats.c
+++ b/collectors/proc.plugin/proc_diskstats.c
@@ -852,6 +852,8 @@ static void add_labels_to_disk(struct disk *d, RRDSET *st) {
             rrdlabels_add(st->state->chart_labels, "device_type", "virtual", RRDLABEL_SRC_AUTO);
             break;
     }
+
+    rrdcalc_update_rrdlabels(st);
 }
 
 int do_proc_diskstats(int update_every, usec_t dt) {
diff --git a/collectors/proc.plugin/proc_mdstat.c b/collectors/proc.plugin/proc_mdstat.c
index c8015827ee..c50c462adc 100644
--- a/collectors/proc.plugin/proc_mdstat.c
+++ b/collectors/proc.plugin/proc_mdstat.c
@@ -80,6 +80,7 @@ static inline void make_chart_obsolete(char *name, const char *id_modifier)
 static void add_labels_to_mdstat(struct raid *raid, RRDSET *st) {
     rrdlabels_add(st->state->chart_labels, "device", raid->name, RRDLABEL_SRC_AUTO);
     rrdlabels_add(st->state->chart_labels, "raid_level", raid->level, RRDLABEL_SRC_AUTO);
+    rrdcalc_update_rrdlabels(st);
 }
 
 int do_proc_mdstat(int update_every, usec_t dt)
diff --git a/collectors/proc.plugin/proc_net_wireless.c b/collectors/proc.plugin/proc_net_wireless.c
index c6ee4ff700..dac8344895 100644
--- a/collectors/proc.plugin/proc_net_wireless.c
+++ b/collectors/proc.plugin/proc_net_wireless.c
@@ -200,6 +200,7 @@ static void configure_device(int do_status, int do_quality, int do_discarded_pac
 
 static void add_labels_to_wireless(struct netwireless *w, RRDSET *st) {
     rrdlabels_add(st->state->chart_labels, "device", w->name, RRDLABEL_SRC_AUTO);
+    rrdcalc_update_rrdlabels(st);
 }
 
 int do_proc_net_wireless(int update_every, usec_t dt)
diff --git a/collectors/proc.plugin/sys_class_power_supply.c b/collectors/proc.plugin/sys_class_power_supply.c
index a80d46e937..e8a7924b09 100644
--- a/collectors/proc.plugin/sys_class_power_supply.c
+++ b/collectors/proc.plugin/sys_class_power_supply.c
@@ -114,6 +114,7 @@ void power_supply_free(struct power_supply *ps) {
 
 static void add_labels_to_power_supply(struct power_supply *ps, RRDSET *st) {
     rrdlabels_add(st->state->chart_labels, "device", ps->name, RRDLABEL_SRC_AUTO);
+    rrdcalc_update_rrdlabels(st);
 }
 
 int do_sys_class_power_supply(int update_every, usec_t dt) {
diff --git a/collectors/proc.plugin/sys_fs_btrfs.c b/collectors/proc.plugin/sys_fs_btrfs.c
index 158587a8f7..83a2dee376 100644
--- a/collectors/proc.plugin/sys_fs_btrfs.c
+++ b/collectors/proc.plugin/sys_fs_btrfs.c
@@ -451,6 +451,7 @@ static inline int find_all_btrfs_pools(const char *path) {
 static void add_labels_to_btrfs(BTRFS_NODE *n, RRDSET *st) {
     rrdlabels_add(st->state->chart_labels, "device", n->id, RRDLABEL_SRC_AUTO);
     rrdlabels_add(st->state->chart_labels, "device_label", n->label, RRDLABEL_SRC_AUTO);
+    rrdcalc_update_rrdlabels(st);
 }
 
 int do_sys_fs_btrfs(int update_every, usec_t dt) {
diff --git a/database/rrd.h b/database/rrd.h
index 605ff50bc2..f8919e83a5 100644
--- a/database/rrd.h
+++ b/database/rrd.h
@@ -216,6 +216,7 @@ extern void rrdlabels_destroy(DICTIONARY *labels_dict);
 extern void rrdlabels_add(DICTIONARY *dict, const char *name, const char *value, RRDLABEL_SRC ls);
 extern void rrdlabels_add_pair(DICTIONARY *dict, const char *string, RRDLABEL_SRC ls);
 extern void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const char *key, const char *quote, const char *null);
+extern void rrdlabels_get_value_to_char_or_null(DICTIONARY *labels, char **value, const char *key);
 
 extern void rrdlabels_unmark_all(DICTIONARY *labels);
 extern void rrdlabels_remove_all_unmarked(DICTIONARY *labels);
diff --git a/database/rrdcalc.c b/database/rrdcalc.c
index cab60468bb..ef0a918b03 100644
--- a/database/rrdcalc.c
+++ b/database/rrdcalc.c
@@ -35,6 +35,62 @@ inline const char *rrdcalc_status2string(RRDCALC_STATUS status) {
     }
 }
 
+char *rrdcalc_replace_variables(const char *line, RRDCALC *rc)
+{
+    if (!line)
+        return NULL;
+
+    size_t pos = 0;
+    char *temp = strdupz(line);
+    char var[RRDCALC_VAR_MAX];
+    char *m, *lbl_value = NULL;
+
+    while ((m = strchr(temp + pos, '$'))) {
+        int i=0;
+        char *e = m;
+        while (*e) {
+            if (*e == ' ' || i == RRDCALC_VAR_MAX - 1) {
+                break;
+            }
+            else
+                var[i]=*e;
+            e++;
+            i++;
+        }
+        var[i]='\0';
+        pos = m - temp + 1;
+        if (!strcmp(var, RRDCALC_VAR_FAMILY)) {
+            char *buf = find_and_replace(temp, var, (rc->rrdset && rc->rrdset->family) ? rc->rrdset->family : "", m);
+            freez(temp);
+            temp = buf;
+        } else if (!strncmp(var, RRDCALC_VAR_LABEL, RRDCALC_VAR_LABEL_LEN)) {
+            if(likely(rc->rrdset->state && rc->rrdset->state->chart_labels)) {
+                rrdlabels_get_value_to_char_or_null(rc->rrdset->state->chart_labels, &lbl_value, var+RRDCALC_VAR_LABEL_LEN);
+                if (lbl_value) {
+                    char *buf = find_and_replace(temp, var, lbl_value, m);
+                    freez(temp);
+                    temp = buf;
+                    freez(lbl_value);
+                }
+            }
+        }
+    }
+
+    return temp;
+}
+
+void rrdcalc_update_rrdlabels(RRDSET *st) {
+    RRDCALC *rc;
+    for( rc = st->alarms; rc ; rc = rc->rrdset_next ) {
+        if (rc->original_info) {
+            if (rc->info)
+                freez(rc->info);
+
+            rc->info = rrdcalc_replace_variables(rc->original_info, rc);
+        }
+    }
+}
+
 static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
     RRDHOST *host = st->rrdhost;
 
@@ -83,6 +139,12 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
 
     if(!rc->units) rc->units = strdupz(st->units);
 
+    if (rc->original_info) {
+        if (rc->info)
+            freez(rc->info);
+        rc->info = rrdcalc_replace_variables(rc->original_info, rc);
+    }
+
     time_t now = now_realtime_sec();
     ALARM_ENTRY *ae = health_create_alarm_entry(
         host,
@@ -427,7 +489,10 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
     if(rt->recipient) rc->recipient = strdupz(rt->recipient);
     if(rt->source) rc->source = strdupz(rt->source);
     if(rt->units) rc->units = strdupz(rt->units);
-    if(rt->info) rc->info = strdupz(rt->info);
+    if(rt->info) {
+        rc->info = strdupz(rt->info);
+        rc->original_info = strdupz(rt->info);
+    }
 
     if (rt->classification) rc->classification = strdupz(rt->classification);
     if (rt->component) rc->component = strdupz(rt->component);
@@ -543,6 +608,7 @@ inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const ch
     if(rc->source) newrc->source = strdupz(rc->source);
     if(rc->units) newrc->units = strdupz(rc->units);
     if(rc->info) newrc->info = strdupz(rc->info);
+    if(rc->original_info) newrc->original_info = strdupz(rc->original_info);
 
     if (rc->classification) newrc->classification = strdupz(rc->classification);
     if (rc->component) newrc->component = strdupz(rc->component);
@@ -586,6 +652,7 @@ void rrdcalc_free(RRDCALC *rc) {
     freez(rc->source);
     freez(rc->units);
     freez(rc->info);
+    freez(rc->original_info);
     freez(rc->classification);
     freez(rc->component);
     freez(rc->type);
diff --git a/database/rrdcalc.h b/database/rrdcalc.h
index 0dcd7ce698..66daf844fe 100644
--- a/database/rrdcalc.h
+++ b/database/rrdcalc.h
@@ -58,6 +58,7 @@ struct rrdcalc {
 
     char *source;                   // the source of this alarm
     char *units;                    // the units of the alarm
+    char *original_info;            // the original info field before any variable replacement
     char *info;                     // a short description of the alarm
 
     int update_every;               // update frequency for the alarm
@@ -210,6 +211,7 @@ extern RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const ch
 extern void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc);
 extern void dimension_remove_pipe_comma(char *str);
 extern char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen);
+extern void rrdcalc_update_rrdlabels(RRDSET *st);
 
 extern void rrdcalc_labels_unlink();
 extern void rrdcalc_labels_unlink_alarm_from_host(RRDHOST *host);
@@ -221,4 +223,9 @@ static inline int rrdcalc_isrepeating(RRDCALC *rc) {
     return 0;
 }
 
+#define RRDCALC_VAR_MAX 100
+#define RRDCALC_VAR_FAMILY "$family"
+#define RRDCALC_VAR_LABEL "$label:"
+#define RRDCALC_VAR_LABEL_LEN (sizeof(RRDCALC_VAR_LABEL)-1)
+
 #endif //NETDATA_RRDCALC_H
diff --git a/database/rrdlabels.c b/database/rrdlabels.c
index 5198cb4aae..afd738c3f3 100644
--- a/database/rrdlabels.c
+++ b/database/rrdlabels.c
@@ -620,7 +620,7 @@ void rrdlabels_add_pair(DICTIONARY *dict, const char *string, RRDLABEL_SRC ls) {
 }
 
 // ----------------------------------------------------------------------------
-// rrdlabels_get_to_buffer_or_null()
+// rrdlabels_get_value_to_buffer_or_null()
 
 void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const char *key, const char *quote, const char *null) {
     DICTIONARY_ITEM *acquired_item = dictionary_get_and_acquire_item(labels, key);
@@ -634,6 +634,17 @@ void rrdlabels_get_value_to_buffer_or_null(DICTIONARY *labels, BUFFER *wb, const
     dictionary_acquired_item_release(labels, acquired_item);
 }
 
+// ----------------------------------------------------------------------------
+// rrdlabels_get_value_to_char_or_null()
+
+void rrdlabels_get_value_to_char_or_null(DICTIONARY *labels, char **value, const char *key) {
+    DICTIONARY_ITEM *acquired_item = dictionary_get_and_acquire_item(labels, key);
+    RRDLABEL *lb = dictionary_acquired_item_value(acquired_item);
+
+    *value = (lb && lb->label_value) ? strdupz(string2str(lb->label_value)) : NULL;
+
+    dictionary_acquired_item_release(labels, acquired_item);
+}
 
 // ----------------------------------------------------------------------------
 // rrdlabels_unmark_all()
@@ -939,6 +950,8 @@ void rrdset_update_rrdlabels(RRDSET *st, DICTIONARY *new_rrdlabels) {
     if (new_rrdlabels)
         rrdlabels_migrate_to_these(st->state->chart_labels, new_rrdlabels);
 
+    rrdcalc_update_rrdlabels(st);
+
     // TODO - we should also cleanup sqlite from old new_rrdlabels that have been removed
     rrdlabels_walkthrough_read(st->state->chart_labels, chart_label_store_to_sql_callback, st);
 }
diff --git a/exporting/tests/netdata_doubles.c b/exporting/tests/netdata_doubles.c
index ee36e887a2..86707e16e3 100644
--- a/exporting/tests/netdata_doubles.c
+++ b/exporting/tests/netdata_doubles.c
@@ -255,3 +255,7 @@ void sql_store_chart_label(uuid_t *chart_uuid, int source_type, char *label, cha
     (void)label;
     (void)value;
 }
+
+void rrdcalc_update_rrdlabels(RRDSET *st) {
+    (void)st;
+}
diff --git a/health/REFERENCE.md b/health/REFERENCE.md
index d1af747676..90da4102a9 100644
--- a/health/REFERENCE.md
+++ b/health/REFERENCE.md
@@ -536,12 +536,48 @@ See our [simple patterns docs](/libnetdata/simple_pattern/README.md) for more ex
 
 #### Alarm line `info`
 
-The info field can contain a small piece of text describing the alarm or template. This will be rendered in notifications and UI elements whenever the specific alarm is in focus. An example for the `ram_available` alarm is:
+The info field can contain a small piece of text describing the alarm or template. This will be rendered in
+notifications and UI elements whenever the specific alarm is in focus. An example for the `ram_available` alarm is:
 
 ```yaml
 info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
 ```
 
+info fields can contain special variables in their text that will be replaced during run-time to provide more specific
+alert information. Current variables supported are:
+
+| variable | description |
+| ---------| ----------- |
+| $family  | Will be replaced by the family instance for the alert (e.g. eth0) |
+| $label:  | Followed by a chart label name, this will replace the variable with the chart label's value |
+
+For example, an info field like the following:
+
+```yaml
+info: average inbound utilization for the network interface $family over the last minute
+```
+
+Will be rendered on the alert acting on interface `eth0` as:
+
+```yaml
+info: average inbound utilization for the network interface eth0 over the last minute
+```
+
+An alert acting on a chart that has a chart label named e.g. `target`, with a value of `https://netdata.cloud/`,
+can be enriched as follows:
+
+```yaml
+info: average ratio of HTTP responses with unexpected status over the last 5 minutes for the site $label:target
+```
+
+Will become:
+
+```yaml
+info: average ratio of HTTP responses with unexpected status over the last 5 minutes for the site https://netdata.cloud/
+```
+
+> Please note that variable names are case sensitive.
+
 ## Expressions
 
 Netdata has an internal [infix expression parser](/libnetdata/eval). This parses expressions and creates an internal
diff --git a/health/health_config.c b/health/health_config.c
index 7efa45486a..77b12fc183 100644
--- a/health/health_config.c
+++ b/health/health_config.c
@@ -942,6 +942,8 @@ static int health_readfile(const char *filename, void *data) {
                 }
                 rc->info = strdupz(value);
                 strip_quotes(rc->info);
+                rc->original_info = strdupz(value);
+                strip_quotes(rc->original_info);
             }
             else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) {
                 alert_cfg->delay = strdupz(value);
diff --git a/health/health_json.c b/health/health_json.c
index 4e8f437613..a0dc55b612 100644
--- a/health/health_json.c
+++ b/health/health_json.c
@@ -96,22 +96,7 @@ void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host)
                    , (ae->flags & HEALTH_ENTRY_FLAG_SILENCED)?"true":"false"
     );
 
-    char *replaced_info = NULL;
-    if (likely(ae->info)) {
-        char *m = NULL;
-        replaced_info = strdupz(ae->info);
-        size_t pos = 0;
-        while ((m = strstr(replaced_info + pos, "$family"))) {
-            char *buf = NULL;
-            pos = m - replaced_info;
-            buf = find_and_replace(replaced_info, "$family", ae->family ? ae->family : "", m);
-            freez(replaced_info);
-            replaced_info = strdupz(buf);
-            freez(buf);
-        }
-    }
-
-    health_string2json(wb, "\t\t", "info", replaced_info?replaced_info:"", ",\n");
+    health_string2json(wb, "\t\t", "info", ae->info ? ae->info : "", ",\n");
 
     if(unlikely(ae->flags & HEALTH_ENTRY_FLAG_NO_CLEAR_NOTIFICATION)) {
         buffer_strcat(wb, "\t\t\"no_clear_notification\": true,\n");
@@ -127,7 +112,6 @@ void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host)
 
     buffer_strcat(wb, "\t}");
 
-    freez(replaced_info);
     freez(edit_command);
 }
 
@@ -182,21 +166,6 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
     char value_string[100 + 1];
     format_value_and_unit(value_string, 100, rc->value, rc->units, -1);
 
-    char *replaced_info = NULL;
-    if (likely(rc->info)) {
-        char *m;
-        replaced_info = strdupz(rc->info);
-        size_t pos = 0;
-        while ((m = strstr(replaced_info + pos, "$family"))) {
-            char *buf = NULL;
-            pos = m - replaced_info;
-            buf = find_and_replace(replaced_info, "$family", (rc->rrdset && rc->rrdset->family) ? rc->rrdset->family : "", m);
-            freez(replaced_info);
-            replaced_info = strdupz(buf);
-            freez(buf);
-        }
-    }
-
     char hash_id[GUID_LEN + 1];
     uuid_unparse_lower(rc->config_hash_id, hash_id);
 
@@ -250,7 +219,7 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
                    , rc->recipient?rc->recipient:host->health_default_recipient
                    , rc->source
                    , rc->units?rc->units:""
-                   , replaced_info?replaced_info:""
+                   , rc->info?rc->info:""
                    , rrdcalc_status2string(rc->status)
                    , (unsigned long)rc->last_status_change
                    , (unsigned long)rc->last_updated
@@ -322,8 +291,6 @@ static inline void health_rrdcalc2json_nolock(RRDHOST *host, BUFFER *wb, RRDCALC
     buffer_strcat(wb, "\n");
 
     buffer_strcat(wb, "\t\t}");
-
-    freez(replaced_info);
 }
 
 //void health_rrdcalctemplate2json_nolock(BUFFER *wb, RRDCALCTEMPLATE *rt) {
diff --git a/health/health_log.c b/health/health_log.c
index f0a05531d3..d880625e05 100644
--- a/health/health_log.c
+++ b/health/health_log.c
@@ -512,23 +512,8 @@ inline ALARM_ENTRY* health_create_alarm_entry(
     ae->old_value_string = strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae->units, -1));
     ae->new_value_string = strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae->units, -1));
 
-    char *replaced_info = NULL;
-    if (likely(info)) {
-        char *m;
-        replaced_info = strdupz(info);
-        size_t pos = 0;
-        while ((m = strstr(replaced_info + pos, "$family"))) {
-            char *buf = NULL;
-            pos = m - replaced_info;
-            buf = find_and_replace(replaced_info, "$family", (ae->family) ? ae->family : "", m);
-            freez(replaced_info);
-            replaced_info = strdupz(buf);
-            freez(buf);
-        }
-    }
-
-    if(replaced_info) ae->info = strdupz(replaced_info);
-    freez(replaced_info);
+    if (info)
+        ae->info = strdupz(info);
 
     ae->old_status = old_status;
     ae->new_status = new_status;