mirror of
https://github.com/netdata/netdata.git
synced 2025-04-06 14:35:32 +00:00

* rrdset - in progress * rrdset optimal constructor; rrdset conflict * rrdset final touches * re-organization of rrdset object members * prevent use-after-free * dictionary dfe supports also counting of iterations * rrddim managed by dictionary * rrd.h cleanup * DICTIONARY_ITEM now is referencing actual dictionary items in the code * removed rrdset linked list * Revert "removed rrdset linked list" This reverts commit 690d6a588b4b99619c2c5e10f84e8f868ae6def5. * removed rrdset linked list * added comments * Switch chart uuid to static allocation in rrdset Remove unused functions * rrdset_archive() and friends... * always create rrdfamily * enable ml_free_dimension * rrddim_foreach done with dfe * most custom rrddim loops replaced with rrddim_foreach * removed accesses to rrddim->dimensions * removed locks that are no longer needed * rrdsetvar is now managed by the dictionary * set rrdset is rrdsetvar, fixes https://github.com/netdata/netdata/pull/13646#issuecomment-1242574853 * conflict callback of rrdsetvar now properly checks if it has to reset the variable * dictionary registered callbacks accept as first parameter the DICTIONARY_ITEM * dictionary dfe now uses internal counter to report; avoided excess variables defined with dfe * dictionary walkthrough callbacks get dictionary acquired items * dictionary reference counters that can be dupped from zero * added advanced functions for get and del * rrdvar managed by dictionaries * thread safety for rrdsetvar * faster rrdvar initialization * rrdvar string lengths should match in all add, del, get functions * rrdvar internals hidden from the rest of the world * rrdvar is now acquired throughout netdata * hide the internal structures of rrdsetvar * rrdsetvar is now acquired through out netdata * rrddimvar managed by dictionary; rrddimvar linked list removed; rrddimvar structures hidden from the rest of netdata * better error handling * dont create variables if not initialized for health * dont create variables if not initialized for health again * rrdfamily is now managed by dictionaries; references of it are acquired dictionary items * type checking on acquired objects * rrdcalc renaming of functions * type checking for rrdfamily_acquired * rrdcalc managed by dictionaries * rrdcalc double free fix * host rrdvars is always needed * attempt to fix deadlock 1 * attempt to fix deadlock 2 * Remove unused variable * attempt to fix deadlock 3 * snprintfz * rrdcalc index in rrdset fix * Stop storing active charts and computing chart hashes * Remove store active chart function * Remove compute chart hash function * Remove sql_store_chart_hash function * Remove store_active_dimension function * dictionary delayed destruction * formatting and cleanup * zero dictionary base on rrdsetvar * added internal error to log delayed destructions of dictionaries * typo in rrddimvar * added debugging info to dictionary * debug info * fix for rrdcalc keys being empty * remove forgotten unlock * remove deadlock * Switch to metadata version 5 and drop chart_hash chart_hash_map chart_active dimension_active v_chart_hash * SQL cosmetic changes * do not busy wait while destroying a referenced dictionary * remove deadlock * code cleanup; re-organization; * fast cleanup and flushing of dictionaries * number formatting fixes * do not delete configured alerts when archiving a chart * rrddim obsolete linked list management outside dictionaries * removed duplicate contexts call * fix crash when rrdfamily is not initialized * dont keep rrddimvar referenced * properly cleanup rrdvar * removed some locks * Do not attempt to cleanup chart_hash / chart_hash_map * rrdcalctemplate managed by dictionary * register callbacks on the right dictionary * removed some more locks * rrdcalc secondary index replaced with linked-list; rrdcalc labels updates are now executed by health thread * when looking up for an alarm look using both chart id and chart name * host initialization a bit more modular * init rrdlabels on host update * preparation for dictionary views * improved comment * unused variables without internal checks * service threads isolation and worker info * more worker info in service thread * thread cancelability debugging with internal checks * strings data races addressed; fixes https://github.com/netdata/netdata/issues/13647 * dictionary modularization * Remove unused SQL statement definition * unit-tested thread safety of dictionaries; removed data race conditions on dictionaries and strings; dictionaries now can detect if the caller is holds a write lock and automatically all the calls become their unsafe versions; all direct calls to unsafe version is eliminated * remove worker_is_idle() from the exit of service functions, because we lose the lock time between loops * rewritten dictionary to have 2 separate locks, one for indexing and another for traversal * Update collectors/cgroups.plugin/sys_fs_cgroup.c Co-authored-by: Vladimir Kobal <vlad@prokk.net> * Update collectors/cgroups.plugin/sys_fs_cgroup.c Co-authored-by: Vladimir Kobal <vlad@prokk.net> * Update collectors/proc.plugin/proc_net_dev.c Co-authored-by: Vladimir Kobal <vlad@prokk.net> * fix memory leak in rrdset cache_dir * minor dictionary changes * dont use index locks in single threaded * obsolete dict option * rrddim options and flags separation; rrdset_done() optimization to keep array of reference pointers to rrddim; * fix jump on uninitialized value in dictionary; remove double free of cache_dir * addressed codacy findings * removed debugging code * use the private refcount on dictionaries * make dictionary item desctructors work on dictionary destruction; strictier control on dictionary API; proper cleanup sequence on rrddim; * more dictionary statistics * global statistics about dictionary operations, memory, items, callbacks * dictionary support for views - missing the public API * removed warning about unused parameter * chart and context name for cloud * chart and context name for cloud, again * dictionary statistics fixed; first implementation of dictionary views - not currently used * only the master can globally delete an item * context needs netdata prefix * fix context and chart it of spins * fix for host variables when health is not enabled * run garbage collector on item insert too * Fix info message; remove extra "using" * update dict unittest for new placement of garbage collector * we need RRDHOST->rrdvars for maintaining custom host variables * Health initialization needs the host->host_uuid * split STRING to its own files; no code changes other than that * initialize health unconditionally * unit tests do not pollute the global scope with their variables * Skip initialization when creating archived hosts on startup. When a child connects it will initialize properly Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Vladimir Kobal <vlad@prokk.net>
676 lines
25 KiB
C
676 lines
25 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "plugin_proc.h"
|
|
|
|
#define PLUGIN_PROC_MODULE_MDSTAT_NAME "/proc/mdstat"
|
|
|
|
struct raid {
|
|
int redundant;
|
|
char *name;
|
|
uint32_t hash;
|
|
char *level;
|
|
|
|
RRDDIM *rd_health;
|
|
unsigned long long failed_disks;
|
|
|
|
RRDSET *st_disks;
|
|
RRDDIM *rd_down;
|
|
RRDDIM *rd_inuse;
|
|
unsigned long long total_disks;
|
|
unsigned long long inuse_disks;
|
|
|
|
RRDSET *st_operation;
|
|
RRDDIM *rd_check;
|
|
RRDDIM *rd_resync;
|
|
RRDDIM *rd_recovery;
|
|
RRDDIM *rd_reshape;
|
|
unsigned long long check;
|
|
unsigned long long resync;
|
|
unsigned long long recovery;
|
|
unsigned long long reshape;
|
|
|
|
RRDSET *st_finish;
|
|
RRDDIM *rd_finish_in;
|
|
unsigned long long finish_in;
|
|
|
|
RRDSET *st_speed;
|
|
RRDDIM *rd_speed;
|
|
unsigned long long speed;
|
|
|
|
char *mismatch_cnt_filename;
|
|
RRDSET *st_mismatch_cnt;
|
|
RRDDIM *rd_mismatch_cnt;
|
|
unsigned long long mismatch_cnt;
|
|
|
|
RRDSET *st_nonredundant;
|
|
RRDDIM *rd_nonredundant;
|
|
};
|
|
|
|
struct old_raid {
|
|
int redundant;
|
|
char *name;
|
|
uint32_t hash;
|
|
int found;
|
|
};
|
|
|
|
static inline char *remove_trailing_chars(char *s, char c)
|
|
{
|
|
while (*s) {
|
|
if (unlikely(*s == c)) {
|
|
*s = '\0';
|
|
}
|
|
s++;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
static inline void make_chart_obsolete(char *name, const char *id_modifier)
|
|
{
|
|
char id[50 + 1];
|
|
RRDSET *st = NULL;
|
|
|
|
if (likely(name && id_modifier)) {
|
|
snprintfz(id, 50, "mdstat.%s_%s", name, id_modifier);
|
|
st = rrdset_find_active_byname_localhost(id);
|
|
if (likely(st))
|
|
rrdset_is_obsolete(st);
|
|
}
|
|
}
|
|
|
|
static void add_labels_to_mdstat(struct raid *raid, RRDSET *st) {
|
|
rrdlabels_add(st->rrdlabels, "device", raid->name, RRDLABEL_SRC_AUTO);
|
|
rrdlabels_add(st->rrdlabels, "raid_level", raid->level, RRDLABEL_SRC_AUTO);
|
|
}
|
|
|
|
int do_proc_mdstat(int update_every, usec_t dt)
|
|
{
|
|
(void)dt;
|
|
static procfile *ff = NULL;
|
|
static int do_health = -1, do_nonredundant = -1, do_disks = -1, do_operations = -1, do_mismatch = -1,
|
|
do_mismatch_config = -1;
|
|
static int make_charts_obsolete = -1;
|
|
static char *mdstat_filename = NULL, *mismatch_cnt_filename = NULL;
|
|
static struct raid *raids = NULL;
|
|
static size_t raids_allocated = 0;
|
|
size_t raids_num = 0, raid_idx = 0, redundant_num = 0;
|
|
static struct old_raid *old_raids = NULL;
|
|
static size_t old_raids_allocated = 0;
|
|
size_t old_raid_idx = 0;
|
|
|
|
if (unlikely(do_health == -1)) {
|
|
do_health =
|
|
config_get_boolean("plugin:proc:/proc/mdstat", "faulty devices", CONFIG_BOOLEAN_YES);
|
|
do_nonredundant =
|
|
config_get_boolean("plugin:proc:/proc/mdstat", "nonredundant arrays availability", CONFIG_BOOLEAN_YES);
|
|
do_mismatch_config =
|
|
config_get_boolean_ondemand("plugin:proc:/proc/mdstat", "mismatch count", CONFIG_BOOLEAN_AUTO);
|
|
do_disks =
|
|
config_get_boolean("plugin:proc:/proc/mdstat", "disk stats", CONFIG_BOOLEAN_YES);
|
|
do_operations =
|
|
config_get_boolean("plugin:proc:/proc/mdstat", "operation status", CONFIG_BOOLEAN_YES);
|
|
|
|
make_charts_obsolete =
|
|
config_get_boolean("plugin:proc:/proc/mdstat", "make charts obsolete", CONFIG_BOOLEAN_YES);
|
|
|
|
char filename[FILENAME_MAX + 1];
|
|
|
|
snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/mdstat");
|
|
mdstat_filename = config_get("plugin:proc:/proc/mdstat", "filename to monitor", filename);
|
|
|
|
snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/md/mismatch_cnt");
|
|
mismatch_cnt_filename = config_get("plugin:proc:/proc/mdstat", "mismatch_cnt filename to monitor", filename);
|
|
}
|
|
|
|
if (unlikely(!ff)) {
|
|
ff = procfile_open(mdstat_filename, " \t:", PROCFILE_FLAG_DEFAULT);
|
|
if (unlikely(!ff))
|
|
return 1;
|
|
}
|
|
|
|
ff = procfile_readall(ff);
|
|
if (unlikely(!ff))
|
|
return 0; // we return 0, so that we will retry opening it next time
|
|
|
|
size_t lines = procfile_lines(ff);
|
|
size_t words = 0;
|
|
|
|
if (unlikely(lines < 2)) {
|
|
error("Cannot read /proc/mdstat. Expected 2 or more lines, read %zu.", lines);
|
|
return 1;
|
|
}
|
|
|
|
// find how many raids are there
|
|
size_t l;
|
|
raids_num = 0;
|
|
for (l = 1; l < lines - 2; l++) {
|
|
if (unlikely(procfile_lineword(ff, l, 1)[0] == 'a')) // check if the raid is active
|
|
raids_num++;
|
|
}
|
|
|
|
if (unlikely(!raids_num && !old_raids_allocated))
|
|
return 0; // we return 0, so that we will retry searching for raids next time
|
|
|
|
// allocate the memory we need;
|
|
if (unlikely(raids_num != raids_allocated)) {
|
|
for (raid_idx = 0; raid_idx < raids_allocated; raid_idx++) {
|
|
struct raid *raid = &raids[raid_idx];
|
|
freez(raid->name);
|
|
freez(raid->level);
|
|
freez(raid->mismatch_cnt_filename);
|
|
}
|
|
if (raids_num) {
|
|
raids = (struct raid *)reallocz(raids, raids_num * sizeof(struct raid));
|
|
memset(raids, 0, raids_num * sizeof(struct raid));
|
|
} else {
|
|
freez(raids);
|
|
raids = NULL;
|
|
}
|
|
raids_allocated = raids_num;
|
|
}
|
|
|
|
// loop through all lines except the first and the last ones
|
|
for (l = 1, raid_idx = 0; l < (lines - 2) && raid_idx < raids_num; l++) {
|
|
struct raid *raid = &raids[raid_idx];
|
|
raid->redundant = 0;
|
|
|
|
words = procfile_linewords(ff, l);
|
|
|
|
if (unlikely(words < 3))
|
|
continue;
|
|
|
|
if (unlikely(procfile_lineword(ff, l, 1)[0] != 'a'))
|
|
continue;
|
|
|
|
if (unlikely(!raid->name)) {
|
|
raid->name = strdupz(procfile_lineword(ff, l, 0));
|
|
raid->hash = simple_hash(raid->name);
|
|
raid->level = strdupz(procfile_lineword(ff, l, 2));
|
|
} else if (unlikely(strcmp(raid->name, procfile_lineword(ff, l, 0)))) {
|
|
freez(raid->name);
|
|
freez(raid->mismatch_cnt_filename);
|
|
freez(raid->level);
|
|
memset(raid, 0, sizeof(struct raid));
|
|
raid->name = strdupz(procfile_lineword(ff, l, 0));
|
|
raid->hash = simple_hash(raid->name);
|
|
raid->level = strdupz(procfile_lineword(ff, l, 2));
|
|
}
|
|
|
|
if (unlikely(!raid->name || !raid->name[0]))
|
|
continue;
|
|
|
|
raid_idx++;
|
|
|
|
// check if raid has disk status
|
|
l++;
|
|
words = procfile_linewords(ff, l);
|
|
if (words < 2 || procfile_lineword(ff, l, words - 1)[0] != '[')
|
|
continue;
|
|
|
|
// split inuse and total number of disks
|
|
if (likely(do_health || do_disks)) {
|
|
char *s = NULL, *str_total = NULL, *str_inuse = NULL;
|
|
|
|
s = procfile_lineword(ff, l, words - 2);
|
|
if (unlikely(s[0] != '[')) {
|
|
error("Cannot read /proc/mdstat raid health status. Unexpected format: missing opening bracket.");
|
|
continue;
|
|
}
|
|
str_total = ++s;
|
|
while (*s) {
|
|
if (unlikely(*s == '/')) {
|
|
*s = '\0';
|
|
str_inuse = s + 1;
|
|
} else if (unlikely(*s == ']')) {
|
|
*s = '\0';
|
|
break;
|
|
}
|
|
s++;
|
|
}
|
|
if (unlikely(str_total[0] == '\0' || !str_inuse || str_inuse[0] == '\0')) {
|
|
error("Cannot read /proc/mdstat raid health status. Unexpected format.");
|
|
continue;
|
|
}
|
|
|
|
raid->inuse_disks = str2ull(str_inuse);
|
|
raid->total_disks = str2ull(str_total);
|
|
raid->failed_disks = raid->total_disks - raid->inuse_disks;
|
|
}
|
|
|
|
raid->redundant = 1;
|
|
redundant_num++;
|
|
l++;
|
|
|
|
// check if any operation is performed on the raid
|
|
if (likely(do_operations)) {
|
|
char *s = NULL;
|
|
|
|
raid->check = 0;
|
|
raid->resync = 0;
|
|
raid->recovery = 0;
|
|
raid->reshape = 0;
|
|
raid->finish_in = 0;
|
|
raid->speed = 0;
|
|
|
|
words = procfile_linewords(ff, l);
|
|
|
|
if (likely(words < 2))
|
|
continue;
|
|
|
|
if (unlikely(procfile_lineword(ff, l, 0)[0] != '['))
|
|
continue;
|
|
|
|
if (unlikely(words < 7)) {
|
|
error("Cannot read /proc/mdstat line. Expected 7 params, read %zu.", words);
|
|
continue;
|
|
}
|
|
|
|
char *word;
|
|
word = procfile_lineword(ff, l, 3);
|
|
remove_trailing_chars(word, '%');
|
|
|
|
unsigned long long percentage = (unsigned long long)(str2ndd(word, NULL) * 100);
|
|
// possible operations: check, resync, recovery, reshape
|
|
// 4-th character is unique for each operation so it is checked
|
|
switch (procfile_lineword(ff, l, 1)[3]) {
|
|
case 'c': // check
|
|
raid->check = percentage;
|
|
break;
|
|
case 'y': // resync
|
|
raid->resync = percentage;
|
|
break;
|
|
case 'o': // recovery
|
|
raid->recovery = percentage;
|
|
break;
|
|
case 'h': // reshape
|
|
raid->reshape = percentage;
|
|
break;
|
|
}
|
|
|
|
word = procfile_lineword(ff, l, 5);
|
|
s = remove_trailing_chars(word, 'm'); // remove trailing "min"
|
|
|
|
word += 7; // skip leading "finish="
|
|
|
|
if (likely(s > word))
|
|
raid->finish_in = (unsigned long long)(str2ndd(word, NULL) * 60);
|
|
|
|
word = procfile_lineword(ff, l, 6);
|
|
s = remove_trailing_chars(word, 'K'); // remove trailing "K/sec"
|
|
|
|
word += 6; // skip leading "speed="
|
|
|
|
if (likely(s > word))
|
|
raid->speed = str2ull(word);
|
|
}
|
|
}
|
|
|
|
// read mismatch_cnt files
|
|
if (do_mismatch == -1) {
|
|
if (do_mismatch_config == CONFIG_BOOLEAN_AUTO) {
|
|
if (raids_num > 50)
|
|
do_mismatch = CONFIG_BOOLEAN_NO;
|
|
else
|
|
do_mismatch = CONFIG_BOOLEAN_YES;
|
|
} else
|
|
do_mismatch = do_mismatch_config;
|
|
}
|
|
|
|
if (likely(do_mismatch)) {
|
|
for (raid_idx = 0; raid_idx < raids_num; raid_idx++) {
|
|
char filename[FILENAME_MAX + 1];
|
|
struct raid *raid = &raids[raid_idx];
|
|
|
|
if (likely(raid->redundant)) {
|
|
if (unlikely(!raid->mismatch_cnt_filename)) {
|
|
snprintfz(filename, FILENAME_MAX, mismatch_cnt_filename, raid->name);
|
|
raid->mismatch_cnt_filename = strdupz(filename);
|
|
}
|
|
if (unlikely(read_single_number_file(raid->mismatch_cnt_filename, &raid->mismatch_cnt))) {
|
|
error("Cannot read file '%s'", raid->mismatch_cnt_filename);
|
|
do_mismatch = CONFIG_BOOLEAN_NO;
|
|
error("Monitoring for mismatch count has been disabled");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// check for disappeared raids
|
|
for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
|
|
struct old_raid *old_raid = &old_raids[old_raid_idx];
|
|
int found = 0;
|
|
|
|
for (raid_idx = 0; raid_idx < raids_num; raid_idx++) {
|
|
struct raid *raid = &raids[raid_idx];
|
|
|
|
if (unlikely(
|
|
raid->hash == old_raid->hash && !strcmp(raid->name, old_raid->name) &&
|
|
raid->redundant == old_raid->redundant))
|
|
found = 1;
|
|
}
|
|
|
|
old_raid->found = found;
|
|
}
|
|
|
|
int raid_disappeared = 0;
|
|
for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
|
|
struct old_raid *old_raid = &old_raids[old_raid_idx];
|
|
|
|
if (unlikely(!old_raid->found)) {
|
|
if (likely(make_charts_obsolete)) {
|
|
make_chart_obsolete(old_raid->name, "disks");
|
|
make_chart_obsolete(old_raid->name, "mismatch");
|
|
make_chart_obsolete(old_raid->name, "operation");
|
|
make_chart_obsolete(old_raid->name, "finish");
|
|
make_chart_obsolete(old_raid->name, "speed");
|
|
make_chart_obsolete(old_raid->name, "availability");
|
|
}
|
|
raid_disappeared = 1;
|
|
}
|
|
}
|
|
|
|
// allocate memory for nonredundant arrays
|
|
if (unlikely(raid_disappeared || old_raids_allocated != raids_num)) {
|
|
for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
|
|
freez(old_raids[old_raid_idx].name);
|
|
}
|
|
if (likely(raids_num)) {
|
|
old_raids = reallocz(old_raids, sizeof(struct old_raid) * raids_num);
|
|
memset(old_raids, 0, sizeof(struct old_raid) * raids_num);
|
|
} else {
|
|
freez(old_raids);
|
|
old_raids = NULL;
|
|
}
|
|
old_raids_allocated = raids_num;
|
|
for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
|
|
struct old_raid *old_raid = &old_raids[old_raid_idx];
|
|
struct raid *raid = &raids[old_raid_idx];
|
|
|
|
old_raid->name = strdupz(raid->name);
|
|
old_raid->hash = raid->hash;
|
|
old_raid->redundant = raid->redundant;
|
|
}
|
|
}
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
if (likely(do_health && redundant_num)) {
|
|
static RRDSET *st_mdstat_health = NULL;
|
|
if (unlikely(!st_mdstat_health)) {
|
|
st_mdstat_health = rrdset_create_localhost(
|
|
"mdstat",
|
|
"mdstat_health",
|
|
NULL,
|
|
"health",
|
|
"md.health",
|
|
"Faulty Devices In MD",
|
|
"failed disks",
|
|
PLUGIN_PROC_NAME,
|
|
PLUGIN_PROC_MODULE_MDSTAT_NAME,
|
|
NETDATA_CHART_PRIO_MDSTAT_HEALTH,
|
|
update_every,
|
|
RRDSET_TYPE_LINE);
|
|
|
|
rrdset_isnot_obsolete(st_mdstat_health);
|
|
}
|
|
else
|
|
rrdset_next(st_mdstat_health);
|
|
|
|
if (!redundant_num) {
|
|
if (likely(make_charts_obsolete))
|
|
make_chart_obsolete("mdstat", "health");
|
|
} else {
|
|
for (raid_idx = 0; raid_idx < raids_num; raid_idx++) {
|
|
struct raid *raid = &raids[raid_idx];
|
|
|
|
if (likely(raid->redundant)) {
|
|
if (unlikely(!raid->rd_health && !(raid->rd_health = rrddim_find_active(st_mdstat_health, raid->name))))
|
|
raid->rd_health = rrddim_add(st_mdstat_health, raid->name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
|
|
rrddim_set_by_pointer(st_mdstat_health, raid->rd_health, raid->failed_disks);
|
|
}
|
|
}
|
|
|
|
rrdset_done(st_mdstat_health);
|
|
}
|
|
}
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
for (raid_idx = 0; raid_idx < raids_num; raid_idx++) {
|
|
struct raid *raid = &raids[raid_idx];
|
|
char id[50 + 1];
|
|
char family[50 + 1];
|
|
|
|
if (likely(raid->redundant)) {
|
|
if (likely(do_disks)) {
|
|
snprintfz(id, 50, "%s_disks", raid->name);
|
|
|
|
if (unlikely(!raid->st_disks && !(raid->st_disks = rrdset_find_active_byname_localhost(id)))) {
|
|
snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
|
|
|
|
raid->st_disks = rrdset_create_localhost(
|
|
"mdstat",
|
|
id,
|
|
NULL,
|
|
family,
|
|
"md.disks",
|
|
"Disks Stats",
|
|
"disks",
|
|
PLUGIN_PROC_NAME,
|
|
PLUGIN_PROC_MODULE_MDSTAT_NAME,
|
|
NETDATA_CHART_PRIO_MDSTAT_DISKS + raid_idx * 10,
|
|
update_every,
|
|
RRDSET_TYPE_STACKED);
|
|
|
|
rrdset_isnot_obsolete(raid->st_disks);
|
|
|
|
add_labels_to_mdstat(raid, raid->st_disks);
|
|
}
|
|
else
|
|
rrdset_next(raid->st_disks);
|
|
|
|
if (unlikely(!raid->rd_inuse && !(raid->rd_inuse = rrddim_find_active(raid->st_disks, "inuse"))))
|
|
raid->rd_inuse = rrddim_add(raid->st_disks, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
if (unlikely(!raid->rd_down && !(raid->rd_down = rrddim_find_active(raid->st_disks, "down"))))
|
|
raid->rd_down = rrddim_add(raid->st_disks, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
|
|
rrddim_set_by_pointer(raid->st_disks, raid->rd_inuse, raid->inuse_disks);
|
|
rrddim_set_by_pointer(raid->st_disks, raid->rd_down, raid->failed_disks);
|
|
|
|
rrdset_done(raid->st_disks);
|
|
}
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
if (likely(do_mismatch)) {
|
|
snprintfz(id, 50, "%s_mismatch", raid->name);
|
|
|
|
if (unlikely(!raid->st_mismatch_cnt && !(raid->st_mismatch_cnt = rrdset_find_active_byname_localhost(id)))) {
|
|
snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
|
|
|
|
raid->st_mismatch_cnt = rrdset_create_localhost(
|
|
"mdstat",
|
|
id,
|
|
NULL,
|
|
family,
|
|
"md.mismatch_cnt",
|
|
"Mismatch Count",
|
|
"unsynchronized blocks",
|
|
PLUGIN_PROC_NAME,
|
|
PLUGIN_PROC_MODULE_MDSTAT_NAME,
|
|
NETDATA_CHART_PRIO_MDSTAT_MISMATCH + raid_idx * 10,
|
|
update_every,
|
|
RRDSET_TYPE_LINE);
|
|
|
|
rrdset_isnot_obsolete(raid->st_mismatch_cnt);
|
|
|
|
add_labels_to_mdstat(raid, raid->st_mismatch_cnt);
|
|
}
|
|
else
|
|
rrdset_next(raid->st_mismatch_cnt);
|
|
|
|
if (unlikely(!raid->rd_mismatch_cnt && !(raid->rd_mismatch_cnt = rrddim_find_active(raid->st_mismatch_cnt, "count"))))
|
|
raid->rd_mismatch_cnt = rrddim_add(raid->st_mismatch_cnt, "count", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
|
|
rrddim_set_by_pointer(raid->st_mismatch_cnt, raid->rd_mismatch_cnt, raid->mismatch_cnt);
|
|
|
|
rrdset_done(raid->st_mismatch_cnt);
|
|
}
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
if (likely(do_operations)) {
|
|
snprintfz(id, 50, "%s_operation", raid->name);
|
|
|
|
if (unlikely(!raid->st_operation && !(raid->st_operation = rrdset_find_active_byname_localhost(id)))) {
|
|
snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
|
|
|
|
raid->st_operation = rrdset_create_localhost(
|
|
"mdstat",
|
|
id,
|
|
NULL,
|
|
family,
|
|
"md.status",
|
|
"Current Status",
|
|
"percent",
|
|
PLUGIN_PROC_NAME,
|
|
PLUGIN_PROC_MODULE_MDSTAT_NAME,
|
|
NETDATA_CHART_PRIO_MDSTAT_OPERATION + raid_idx * 10,
|
|
update_every,
|
|
RRDSET_TYPE_LINE);
|
|
|
|
rrdset_isnot_obsolete(raid->st_operation);
|
|
|
|
add_labels_to_mdstat(raid, raid->st_operation);
|
|
}
|
|
else
|
|
rrdset_next(raid->st_operation);
|
|
|
|
if(unlikely(!raid->rd_check && !(raid->rd_check = rrddim_find_active(raid->st_operation, "check"))))
|
|
raid->rd_check = rrddim_add(raid->st_operation, "check", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
|
if(unlikely(!raid->rd_resync && !(raid->rd_resync = rrddim_find_active(raid->st_operation, "resync"))))
|
|
raid->rd_resync = rrddim_add(raid->st_operation, "resync", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
|
if(unlikely(!raid->rd_recovery && !(raid->rd_recovery = rrddim_find_active(raid->st_operation, "recovery"))))
|
|
raid->rd_recovery = rrddim_add(raid->st_operation, "recovery", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
|
if(unlikely(!raid->rd_reshape && !(raid->rd_reshape = rrddim_find_active(raid->st_operation, "reshape"))))
|
|
raid->rd_reshape = rrddim_add(raid->st_operation, "reshape", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
|
|
|
rrddim_set_by_pointer(raid->st_operation, raid->rd_check, raid->check);
|
|
rrddim_set_by_pointer(raid->st_operation, raid->rd_resync, raid->resync);
|
|
rrddim_set_by_pointer(raid->st_operation, raid->rd_recovery, raid->recovery);
|
|
rrddim_set_by_pointer(raid->st_operation, raid->rd_reshape, raid->reshape);
|
|
|
|
rrdset_done(raid->st_operation);
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
snprintfz(id, 50, "%s_finish", raid->name);
|
|
|
|
if (unlikely(!raid->st_finish && !(raid->st_finish = rrdset_find_active_byname_localhost(id)))) {
|
|
snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
|
|
|
|
raid->st_finish = rrdset_create_localhost(
|
|
"mdstat",
|
|
id,
|
|
NULL,
|
|
family,
|
|
"md.expected_time_until_operation_finish",
|
|
"Approximate Time Until Finish",
|
|
"seconds",
|
|
PLUGIN_PROC_NAME,
|
|
PLUGIN_PROC_MODULE_MDSTAT_NAME,
|
|
NETDATA_CHART_PRIO_MDSTAT_FINISH + raid_idx * 10,
|
|
update_every, RRDSET_TYPE_LINE);
|
|
|
|
rrdset_isnot_obsolete(raid->st_finish);
|
|
|
|
add_labels_to_mdstat(raid, raid->st_finish);
|
|
}
|
|
else
|
|
rrdset_next(raid->st_finish);
|
|
|
|
if(unlikely(!raid->rd_finish_in && !(raid->rd_finish_in = rrddim_find_active(raid->st_finish, "finish_in"))))
|
|
raid->rd_finish_in = rrddim_add(raid->st_finish, "finish_in", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
|
|
rrddim_set_by_pointer(raid->st_finish, raid->rd_finish_in, raid->finish_in);
|
|
|
|
rrdset_done(raid->st_finish);
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
snprintfz(id, 50, "%s_speed", raid->name);
|
|
|
|
if (unlikely(!raid->st_speed && !(raid->st_speed = rrdset_find_active_byname_localhost(id)))) {
|
|
snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
|
|
|
|
raid->st_speed = rrdset_create_localhost(
|
|
"mdstat",
|
|
id,
|
|
NULL,
|
|
family,
|
|
"md.operation_speed",
|
|
"Operation Speed",
|
|
"KiB/s",
|
|
PLUGIN_PROC_NAME,
|
|
PLUGIN_PROC_MODULE_MDSTAT_NAME,
|
|
NETDATA_CHART_PRIO_MDSTAT_SPEED + raid_idx * 10,
|
|
update_every,
|
|
RRDSET_TYPE_LINE);
|
|
|
|
rrdset_isnot_obsolete(raid->st_speed);
|
|
|
|
add_labels_to_mdstat(raid, raid->st_speed);
|
|
}
|
|
else
|
|
rrdset_next(raid->st_speed);
|
|
|
|
if (unlikely(!raid->rd_speed && !(raid->rd_speed = rrddim_find_active(raid->st_speed, "speed"))))
|
|
raid->rd_speed = rrddim_add(raid->st_speed, "speed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
|
|
rrddim_set_by_pointer(raid->st_speed, raid->rd_speed, raid->speed);
|
|
|
|
rrdset_done(raid->st_speed);
|
|
}
|
|
} else {
|
|
// --------------------------------------------------------------------
|
|
|
|
if (likely(do_nonredundant)) {
|
|
snprintfz(id, 50, "%s_availability", raid->name);
|
|
|
|
if (unlikely(!raid->st_nonredundant && !(raid->st_nonredundant = rrdset_find_active_localhost(id)))) {
|
|
snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
|
|
|
|
raid->st_nonredundant = rrdset_create_localhost(
|
|
"mdstat",
|
|
id,
|
|
NULL,
|
|
family,
|
|
"md.nonredundant",
|
|
"Nonredundant Array Availability",
|
|
"boolean",
|
|
PLUGIN_PROC_NAME,
|
|
PLUGIN_PROC_MODULE_MDSTAT_NAME,
|
|
NETDATA_CHART_PRIO_MDSTAT_NONREDUNDANT + raid_idx * 10,
|
|
update_every,
|
|
RRDSET_TYPE_LINE);
|
|
|
|
rrdset_isnot_obsolete(raid->st_nonredundant);
|
|
|
|
add_labels_to_mdstat(raid, raid->st_nonredundant);
|
|
}
|
|
else
|
|
rrdset_next(raid->st_nonredundant);
|
|
|
|
if (unlikely(!raid->rd_nonredundant && !(raid->rd_nonredundant = rrddim_find_active(raid->st_nonredundant, "available"))))
|
|
raid->rd_nonredundant = rrddim_add(raid->st_nonredundant, "available", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
|
|
rrddim_set_by_pointer(raid->st_nonredundant, raid->rd_nonredundant, 1);
|
|
|
|
rrdset_done(raid->st_nonredundant);
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|