// SPDX-License-Identifier: GPL-3.0-or-later

#include "plugin_proc.h"

#define PLUGIN_PROC_MODULE_MDSTAT_NAME "/proc/mdstat"

struct raid {
    int redundant;
    char *name;
    uint32_t hash;

    RRDDIM *rd_health;
    unsigned long long failed_disks;

    RRDSET *st_disks;
    RRDDIM *rd_down;
    RRDDIM *rd_inuse;
    unsigned long long total_disks;
    unsigned long long inuse_disks;

    RRDSET *st_operation;
    RRDDIM *rd_check;
    RRDDIM *rd_resync;
    RRDDIM *rd_recovery;
    RRDDIM *rd_reshape;
    unsigned long long check;
    unsigned long long resync;
    unsigned long long recovery;
    unsigned long long reshape;

    RRDSET *st_finish;
    RRDDIM *rd_finish_in;
    unsigned long long finish_in;

    RRDSET *st_speed;
    RRDDIM *rd_speed;
    unsigned long long speed;

    char *mismatch_cnt_filename;
    RRDSET *st_mismatch_cnt;
    RRDDIM *rd_mismatch_cnt;
    unsigned long long mismatch_cnt;

    RRDSET *st_nonredundant;
    RRDDIM *rd_nonredundant;
};

struct old_raid {
    int redundant;
    char *name;
    uint32_t hash;
    int found;
};

static inline char *remove_trailing_chars(char *s, char c) {
    while(*s) {
        if(unlikely(*s == c)) {
            *s = '\0';
        }
        s++;
    }
    return s;
}

static inline void make_chart_obsolete(char *name, const char *id_modifier) {
    char id[50 + 1];
    RRDSET *st = NULL;

    if(likely(name && id_modifier)) {
        snprintfz(id, 50, "mdstat.%s_%s", name, id_modifier);
        st = rrdset_find_byname_localhost(id);
        if(likely(st)) rrdset_is_obsolete(st);
    }
}

int do_proc_mdstat(int update_every, usec_t dt) {
    (void)dt;
    static procfile *ff = NULL;
    static int do_health = -1, do_nonredundant = -1, do_disks = -1, do_operations = -1, do_mismatch = -1, do_mismatch_config = -1;
    static int make_charts_obsolete = -1;
    static char *mdstat_filename = NULL, *mismatch_cnt_filename = NULL;
    static struct raid *raids = NULL;
    static size_t raids_allocated = 0;
    size_t raids_num = 0, raid_idx = 0, redundant_num = 0;
    static struct old_raid *old_raids = NULL;
    static size_t old_raids_allocated = 0;
    size_t old_raid_idx = 0;

    if(unlikely(do_health == -1)){
        do_health           = config_get_boolean("plugin:proc:/proc/mdstat", "faulty devices", CONFIG_BOOLEAN_YES);
        do_nonredundant     = config_get_boolean("plugin:proc:/proc/mdstat", "nonredundant arrays availability", CONFIG_BOOLEAN_YES);
        do_mismatch_config  = config_get_boolean_ondemand("plugin:proc:/proc/mdstat", "mismatch count", CONFIG_BOOLEAN_AUTO);
        do_disks            = config_get_boolean("plugin:proc:/proc/mdstat", "disk stats", CONFIG_BOOLEAN_YES);
        do_operations       = config_get_boolean("plugin:proc:/proc/mdstat", "operation status", CONFIG_BOOLEAN_YES);

        make_charts_obsolete = config_get_boolean("plugin:proc:/proc/mdstat", "make charts obsolete", CONFIG_BOOLEAN_YES);

        char filename[FILENAME_MAX + 1];

        snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/mdstat");
        mdstat_filename = config_get("plugin:proc:/proc/mdstat", "filename to monitor", filename);

        snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/md/mismatch_cnt");
        mismatch_cnt_filename = config_get("plugin:proc:/proc/mdstat", "mismatch_cnt filename to monitor", filename);
    }

    if(unlikely(!ff)) {
        ff = procfile_open(mdstat_filename, " \t:", PROCFILE_FLAG_DEFAULT);
        if(unlikely(!ff)) return 1;
    }

    ff = procfile_readall(ff);
    if(unlikely(!ff)) return 0; // we return 0, so that we will retry opening it next time

    size_t lines = procfile_lines(ff);
    size_t words = 0;

    if(unlikely(lines < 2)) {
        error("Cannot read /proc/mdstat. Expected 2 or more lines, read %zu.", lines);
        return 1;
    }

    // find how many raids are there
    size_t l;
    raids_num = 0;
    for(l = 1; l < lines - 2 ; l++) {
       if(unlikely(procfile_lineword(ff, l, 1)[0] == 'a')) // check if the raid is active
            raids_num++;
    }

    if(unlikely(!raids_num && !old_raids_allocated)) return 0; // we return 0, so that we will retry searching for raids next time

    // allocate the memory we need;
    if(unlikely(raids_num != raids_allocated)) {
        for(raid_idx = 0; raid_idx < raids_allocated; raid_idx++) {
            struct raid *raid = &raids[raid_idx];
            freez(raid->name);
            freez(raid->mismatch_cnt_filename);
        }
        if(raids_num) {
            raids = (struct raid *)reallocz(raids, raids_num * sizeof(struct raid));
            memset(raids, 0, raids_num * sizeof(struct raid));
        }
        else {
            freez(raids);
            raids = NULL;
        }
        raids_allocated = raids_num;
    }

    // loop through all lines except the first and the last ones
    for(l = 1, raid_idx = 0; l < (lines - 2) && raid_idx < raids_num; l++) {
        struct raid *raid = &raids[raid_idx];
        raid->redundant = 0;

        words = procfile_linewords(ff, l);
        if(unlikely(words < 2)) continue;

        if(unlikely(procfile_lineword(ff, l, 1)[0] != 'a')) continue;
        if(unlikely(!raid->name)) {
            raid->name = strdupz(procfile_lineword(ff, l, 0));
            raid->hash = simple_hash(raid->name);
        }
        else if(unlikely(strcmp(raid->name, procfile_lineword(ff, l, 0)))) {
            freez(raid->name);
            freez(raid->mismatch_cnt_filename);
            memset(raid, 0, sizeof(struct raid));
            raid->name = strdupz(procfile_lineword(ff, l, 0));
            raid->hash = simple_hash(raid->name);
        }
        if(unlikely(!raid->name || !raid->name[0])) continue;
        raid_idx++;

        // check if raid has disk status
        l++;
        words = procfile_linewords(ff, l);
        if(words < 2 || procfile_lineword(ff, l, words - 1)[0] != '[') continue;

        // split inuse and total number of disks
        if(likely(do_health || do_disks)) {
            char *s = NULL, *str_total = NULL, *str_inuse = NULL;

            s = procfile_lineword(ff, l, words - 2);
            if(unlikely(s[0] != '[')) {
                error("Cannot read /proc/mdstat raid health status. Unexpected format: missing opening bracket.");
                continue;
            }
            str_total = ++s;
            while(*s) {
                if(unlikely(*s == '/')) {
                    *s = '\0';
                    str_inuse = s + 1;
                }
                else if(unlikely(*s == ']')) {
                    *s = '\0';
                    break;
                }
                s++;
            }
            if(unlikely(str_total[0] == '\0' || str_inuse[0] == '\0')) {
                error("Cannot read /proc/mdstat raid health status. Unexpected format.");
                continue;
            }

            raid->inuse_disks = str2ull(str_inuse);
            raid->total_disks = str2ull(str_total);
            raid->failed_disks = raid->total_disks - raid->inuse_disks;
        }

        raid->redundant = 1;
        redundant_num++;
        l++;

        // check if any operation is performed on the raid
        if(likely(do_operations)) {
            char *s = NULL;

            raid->check = 0;
            raid->resync = 0;
            raid->recovery = 0;
            raid->reshape = 0;
            raid->finish_in = 0;
            raid->speed = 0;

            words = procfile_linewords(ff, l);
            if(likely(words < 2)) continue;
            if(unlikely(procfile_lineword(ff, l, 0)[0] != '[')) continue;
            if(unlikely(words < 7)) {
                error("Cannot read /proc/mdstat line. Expected 7 params, read %zu.", words);
                continue;
            }

            char *word;
            word = procfile_lineword(ff, l, 3);
            remove_trailing_chars(word, '%');

            unsigned long long percentage = (unsigned long long)(str2ld(word, NULL) * 100);
            // possible operations: check, resync, recovery, reshape
            // 4-th character is unique for each operation so it is checked
            switch(procfile_lineword(ff, l, 1)[3]) {
                case 'c': // check
                    raid->check = percentage;
                    break;
                case 'y': // resync
                    raid->resync = percentage;
                    break;
                case 'o': // recovery
                    raid->recovery = percentage;
                    break;
                case 'h': // reshape
                    raid->reshape = percentage;
                    break;
            }

            word = procfile_lineword(ff, l, 5);
            s = remove_trailing_chars(word, 'm'); // remove trailing "min"

            word += 7; // skip leading "finish="

            if(likely(s > word))
                raid->finish_in = (unsigned long long)(str2ld(word, NULL) * 60);

            word = procfile_lineword(ff, l, 6);
            s = remove_trailing_chars(word, 'K'); // remove trailing "K/sec"

            word += 6; // skip leading "speed="

            if(likely(s > word))
                raid->speed = str2ull(word);
        }
    }

    // read mismatch_cnt files
    if(do_mismatch == -1) {
        if(do_mismatch_config == CONFIG_BOOLEAN_AUTO) {
            if(raids_num > 50)
                do_mismatch = CONFIG_BOOLEAN_NO;
            else
                do_mismatch = CONFIG_BOOLEAN_YES;
        }
        else
            do_mismatch = do_mismatch_config;
    }

    if(likely(do_mismatch)) {
        for(raid_idx = 0; raid_idx < raids_num ; raid_idx++) {
            char filename[FILENAME_MAX + 1];
            struct raid *raid = &raids[raid_idx];

            if(likely(raid->redundant)) {
                if(unlikely(!raid->mismatch_cnt_filename)) {
                    snprintfz(filename, FILENAME_MAX, mismatch_cnt_filename, raid->name);
                    raid->mismatch_cnt_filename = strdupz(filename);
                }
                if(unlikely(read_single_number_file(raid->mismatch_cnt_filename, &raid->mismatch_cnt))) {
                    error("Cannot read file '%s'", raid->mismatch_cnt_filename);
                    do_mismatch = CONFIG_BOOLEAN_NO;
                    error("Monitoring for mismatch count has been disabled");
                    break;
                }
            }
        }
    }

    // check for disappeared raids
    for(old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
        struct old_raid *old_raid = &old_raids[old_raid_idx];
        int found = 0;

        for(raid_idx = 0; raid_idx < raids_num ; raid_idx++) {
            struct raid *raid = &raids[raid_idx];

            if(unlikely(raid->hash == old_raid->hash
                        && !strcmp(raid->name, old_raid->name)
                        && raid->redundant == old_raid->redundant)) found = 1;
        }

        old_raid->found = found;
    }

    int raid_disappeared = 0;
    for(old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
        struct old_raid *old_raid = &old_raids[old_raid_idx];

        if(unlikely(!old_raid->found)) {
            if(likely(make_charts_obsolete)) {
                make_chart_obsolete(old_raid->name, "disks");
                make_chart_obsolete(old_raid->name, "mismatch");
                make_chart_obsolete(old_raid->name, "operation");
                make_chart_obsolete(old_raid->name, "finish");
                make_chart_obsolete(old_raid->name, "speed");
                make_chart_obsolete(old_raid->name, "availability");
            }
            raid_disappeared = 1;
        }
    }

    // allocate memory for nonredundant arrays
    if(unlikely(raid_disappeared || old_raids_allocated != raids_num)) {
        for(old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
            freez(old_raids[old_raid_idx].name);
        }
        if(likely(raids_num)) {
            old_raids = reallocz(old_raids, sizeof(struct old_raid) * raids_num);
            memset(old_raids, 0, sizeof(struct old_raid) * raids_num);
        }
        else {
            freez(old_raids);
            old_raids = NULL;
        }
        old_raids_allocated = raids_num;
        for(old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
            struct old_raid *old_raid = &old_raids[old_raid_idx];
            struct raid *raid = &raids[old_raid_idx];

            old_raid->name = strdupz(raid->name);
            old_raid->hash = raid->hash;
            old_raid->redundant = raid->redundant;
        }
    }

    // --------------------------------------------------------------------

    if(likely(do_health && redundant_num)) {
        static RRDSET *st_mdstat_health = NULL;
        if(unlikely(!st_mdstat_health)) {
            st_mdstat_health = rrdset_create_localhost(
                    "mdstat"
                    , "mdstat_health"
                    , NULL
                    , "health"
                    , "md.health"
                    , "Faulty Devices In MD"
                    , "failed disks"
                    , PLUGIN_PROC_NAME
                    , PLUGIN_PROC_MODULE_MDSTAT_NAME
                    , NETDATA_CHART_PRIO_MDSTAT_HEALTH
                    , update_every
                    , RRDSET_TYPE_LINE
            );

            rrdset_isnot_obsolete(st_mdstat_health);
        }
        else
            rrdset_next(st_mdstat_health);

        if(!redundant_num) {
            if(likely(make_charts_obsolete)) make_chart_obsolete("mdstat", "health");
        }
        else {
            for(raid_idx = 0; raid_idx < raids_num; raid_idx++) {
                struct raid *raid = &raids[raid_idx];

                if(likely(raid->redundant)) {
                    if(unlikely(!raid->rd_health && !(raid->rd_health = rrddim_find(st_mdstat_health, raid->name))))
                        raid->rd_health = rrddim_add(st_mdstat_health, raid->name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);

                    rrddim_set_by_pointer(st_mdstat_health, raid->rd_health, raid->failed_disks);
                }
            }

            rrdset_done(st_mdstat_health);
        }
    }

    // --------------------------------------------------------------------

    for(raid_idx = 0; raid_idx < raids_num ; raid_idx++) {
        struct raid *raid = &raids[raid_idx];
        char id[50 + 1];
        char family[50 + 1];

        if(likely(raid->redundant)) {
            if(likely(do_disks)) {
                snprintfz(id, 50, "%s_disks", raid->name);

                if(unlikely(!raid->st_disks && !(raid->st_disks = rrdset_find_byname_localhost(id)))) {
                    snprintfz(family, 50, "%s", raid->name);

                    raid->st_disks = rrdset_create_localhost(
                            "mdstat"
                            , id
                            , NULL
                            , family
                            , "md.disks"
                            , "Disks Stats"
                            , "disks"
                            , PLUGIN_PROC_NAME
                            , PLUGIN_PROC_MODULE_MDSTAT_NAME
                            , NETDATA_CHART_PRIO_MDSTAT_DISKS + raid_idx * 10
                            , update_every
                            , RRDSET_TYPE_STACKED
                    );

                    rrdset_isnot_obsolete(raid->st_disks);
                }
                else
                    rrdset_next(raid->st_disks);

                if(unlikely(!raid->rd_inuse && !(raid->rd_inuse = rrddim_find(raid->st_disks, "inuse"))))
                    raid->rd_inuse = rrddim_add(raid->st_disks, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
                if(unlikely(!raid->rd_down && !(raid->rd_down = rrddim_find(raid->st_disks, "down"))))
                    raid->rd_down = rrddim_add(raid->st_disks, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);

                rrddim_set_by_pointer(raid->st_disks, raid->rd_inuse, raid->inuse_disks);
                rrddim_set_by_pointer(raid->st_disks, raid->rd_down, raid->failed_disks);

                rrdset_done(raid->st_disks);
            }

            // --------------------------------------------------------------------

            if(likely(do_mismatch)) {
                snprintfz(id, 50, "%s_mismatch", raid->name);

                if(unlikely(!raid->st_mismatch_cnt && !(raid->st_mismatch_cnt = rrdset_find_byname_localhost(id)))) {
                    snprintfz(family, 50, "%s", raid->name);

                    raid->st_mismatch_cnt = rrdset_create_localhost(
                            "mdstat"
                            , id
                            , NULL
                            , family
                            , "md.mismatch_cnt"
                            , "Mismatch Count"
                            , "unsynchronized blocks"
                            , PLUGIN_PROC_NAME
                            , PLUGIN_PROC_MODULE_MDSTAT_NAME
                            , NETDATA_CHART_PRIO_MDSTAT_MISMATCH + raid_idx * 10
                            , update_every
                            , RRDSET_TYPE_LINE
                    );

                    rrdset_isnot_obsolete(raid->st_mismatch_cnt);
                }
                else
                    rrdset_next(raid->st_mismatch_cnt);

                if(unlikely(!raid->rd_mismatch_cnt && !(raid->rd_mismatch_cnt = rrddim_find(raid->st_mismatch_cnt, "count"))))
                    raid->rd_mismatch_cnt = rrddim_add(raid->st_mismatch_cnt, "count", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);

                rrddim_set_by_pointer(raid->st_mismatch_cnt, raid->rd_mismatch_cnt, raid->mismatch_cnt);

                rrdset_done(raid->st_mismatch_cnt);
            }

            // --------------------------------------------------------------------

            if(likely(do_operations)) {
                snprintfz(id, 50, "%s_operation", raid->name);

                if(unlikely(!raid->st_operation && !(raid->st_operation = rrdset_find_byname_localhost(id)))) {
                    snprintfz(family, 50, "%s", raid->name);

                    raid->st_operation = rrdset_create_localhost(
                            "mdstat"
                            , id
                            , NULL
                            , family
                            , "md.status"
                            , "Current Status"
                            , "percent"
                            , PLUGIN_PROC_NAME
                            , PLUGIN_PROC_MODULE_MDSTAT_NAME
                            , NETDATA_CHART_PRIO_MDSTAT_OPERATION + raid_idx * 10
                            , update_every
                            , RRDSET_TYPE_LINE
                    );

                    rrdset_isnot_obsolete(raid->st_operation);
                }
                else
                    rrdset_next(raid->st_operation);

                if(unlikely(!raid->rd_check && !(raid->rd_check = rrddim_find(raid->st_operation, "check"))))
                    raid->rd_check = rrddim_add(raid->st_operation, "check", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
                if(unlikely(!raid->rd_resync && !(raid->rd_resync = rrddim_find(raid->st_operation, "resync"))))
                    raid->rd_resync = rrddim_add(raid->st_operation, "resync", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
                if(unlikely(!raid->rd_recovery && !(raid->rd_recovery = rrddim_find(raid->st_operation, "recovery"))))
                    raid->rd_recovery = rrddim_add(raid->st_operation, "recovery", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
                if(unlikely(!raid->rd_reshape && !(raid->rd_reshape = rrddim_find(raid->st_operation, "reshape"))))
                    raid->rd_reshape = rrddim_add(raid->st_operation, "reshape", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);

                rrddim_set_by_pointer(raid->st_operation, raid->rd_check, raid->check);
                rrddim_set_by_pointer(raid->st_operation, raid->rd_resync, raid->resync);
                rrddim_set_by_pointer(raid->st_operation, raid->rd_recovery, raid->recovery);
                rrddim_set_by_pointer(raid->st_operation, raid->rd_reshape, raid->reshape);

                rrdset_done(raid->st_operation);

                // --------------------------------------------------------------------

                snprintfz(id, 50, "%s_finish", raid->name);

                if(unlikely(!raid->st_finish && !(raid->st_finish = rrdset_find_byname_localhost(id)))) {
                    snprintfz(family, 50, "%s", raid->name);

                    raid->st_finish = rrdset_create_localhost(
                            "mdstat"
                            , id
                            , NULL
                            , family
                            , "md.rate"
                            , "Approximate Time Unit Finish"
                            , "seconds"
                            , PLUGIN_PROC_NAME
                            , PLUGIN_PROC_MODULE_MDSTAT_NAME
                            , NETDATA_CHART_PRIO_MDSTAT_FINISH + raid_idx * 10
                            , update_every
                            , RRDSET_TYPE_LINE
                    );

                    rrdset_isnot_obsolete(raid->st_finish);
                }
                else
                    rrdset_next(raid->st_finish);

                if(unlikely(!raid->rd_finish_in && !(raid->rd_finish_in = rrddim_find(raid->st_finish, "finish_in"))))
                    raid->rd_finish_in = rrddim_add(raid->st_finish, "finish_in", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);

                rrddim_set_by_pointer(raid->st_finish, raid->rd_finish_in, raid->finish_in);

                rrdset_done(raid->st_finish);

                // --------------------------------------------------------------------

                snprintfz(id, 50, "%s_speed", raid->name);

                if(unlikely(!raid->st_speed && !(raid->st_speed = rrdset_find_byname_localhost(id)))) {
                    snprintfz(family, 50, "%s", raid->name);

                    raid->st_speed = rrdset_create_localhost(
                            "mdstat"
                            , id
                            , NULL
				    , family
                            , "md.rate"
                            , "Operation Speed"
                            , "KiB/s"
                            , PLUGIN_PROC_NAME
                            , PLUGIN_PROC_MODULE_MDSTAT_NAME
                            , NETDATA_CHART_PRIO_MDSTAT_SPEED + raid_idx * 10
                            , update_every
                            , RRDSET_TYPE_LINE
                    );

                    rrdset_isnot_obsolete(raid->st_speed);
                }
                else
                    rrdset_next(raid->st_speed);

                if(unlikely(!raid->rd_speed && !(raid->rd_speed = rrddim_find(raid->st_speed, "speed"))))
                    raid->rd_speed = rrddim_add(raid->st_speed, "speed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);

                rrddim_set_by_pointer(raid->st_speed, raid->rd_speed, raid->speed);

                rrdset_done(raid->st_speed);
            }
        }
        else {

            // --------------------------------------------------------------------

            if(likely(do_nonredundant)) {
                snprintfz(id, 50, "%s_availability", raid->name);

                if(unlikely(!raid->st_nonredundant && !(raid->st_nonredundant = rrdset_find_localhost(id)))) {
                    snprintfz(family, 50, "%s", raid->name);

                    raid->st_nonredundant = rrdset_create_localhost(
                            "mdstat"
                            , id
                            , NULL
                            , family
                            , "md.nonredundant"
                            , "Nonredundant Array Availability"
                            , "boolean"
                            , PLUGIN_PROC_NAME
                            , PLUGIN_PROC_MODULE_MDSTAT_NAME
                            , NETDATA_CHART_PRIO_MDSTAT_NONREDUNDANT + raid_idx * 10
                            , update_every
                            , RRDSET_TYPE_LINE
                    );

                    rrdset_isnot_obsolete(raid->st_nonredundant);
                }
                else
                    rrdset_next(raid->st_nonredundant);

                if(unlikely(!raid->rd_nonredundant && !(raid->rd_nonredundant = rrddim_find(raid->st_nonredundant, "available"))))
                    raid->rd_nonredundant = rrddim_add(raid->st_nonredundant, "available", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);

                rrddim_set_by_pointer(raid->st_nonredundant, raid->rd_nonredundant, 1);

                rrdset_done(raid->st_nonredundant);
            }
        }
    }

    return 0;
}