0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-24 21:24:12 +00:00
netdata_netdata/ml/Config.cc
Andrew Maguire a9f41c9a8b
set default for minimum num samples to train to 900 ()
This will enable first set of initial models to be trained quicker and makes sense now that ml is enabled by default.
2022-06-20 13:50:46 +01:00

138 lines
5.2 KiB
C++

// SPDX-License-Identifier: GPL-3.0-or-later
#include "Config.h"
#include "ml-private.h"
using namespace ml;
/*
* Global configuration instance to be shared between training and
* prediction threads.
*/
Config ml::Cfg;
template <typename T>
static T clamp(const T& Value, const T& Min, const T& Max) {
return std::max(Min, std::min(Value, Max));
}
/*
* Initialize global configuration variable.
*/
void Config::readMLConfig(void) {
const char *ConfigSectionML = CONFIG_SECTION_ML;
bool EnableAnomalyDetection = config_get_boolean(ConfigSectionML, "enabled", true);
/*
* Read values
*/
unsigned MaxTrainSamples = config_get_number(ConfigSectionML, "maximum num samples to train", 4 * 3600);
unsigned MinTrainSamples = config_get_number(ConfigSectionML, "minimum num samples to train", 1 * 900);
unsigned TrainEvery = config_get_number(ConfigSectionML, "train every", 1 * 3600);
unsigned DBEngineAnomalyRateEvery = config_get_number(ConfigSectionML, "dbengine anomaly rate every", 30);
unsigned DiffN = config_get_number(ConfigSectionML, "num samples to diff", 1);
unsigned SmoothN = config_get_number(ConfigSectionML, "num samples to smooth", 3);
unsigned LagN = config_get_number(ConfigSectionML, "num samples to lag", 5);
double RandomSamplingRatio = config_get_float(ConfigSectionML, "random sampling ratio", 1.0 / LagN);
unsigned MaxKMeansIters = config_get_number(ConfigSectionML, "maximum number of k-means iterations", 1000);
double DimensionAnomalyScoreThreshold = config_get_float(ConfigSectionML, "dimension anomaly score threshold", 0.99);
double HostAnomalyRateThreshold = config_get_float(ConfigSectionML, "host anomaly rate threshold", 0.01);
double ADMinWindowSize = config_get_float(ConfigSectionML, "minimum window size", 30);
double ADMaxWindowSize = config_get_float(ConfigSectionML, "maximum window size", 600);
double ADIdleWindowSize = config_get_float(ConfigSectionML, "idle window size", 30);
double ADWindowRateThreshold = config_get_float(ConfigSectionML, "window minimum anomaly rate", 0.25);
double ADDimensionRateThreshold = config_get_float(ConfigSectionML, "anomaly event min dimension rate threshold", 0.05);
std::stringstream SS;
SS << netdata_configured_cache_dir << "/anomaly-detection.db";
Cfg.AnomalyDBPath = SS.str();
/*
* Clamp
*/
MaxTrainSamples = clamp(MaxTrainSamples, 1 * 3600u, 24 * 3600u);
MinTrainSamples = clamp(MinTrainSamples, 1 * 900u, 6 * 3600u);
TrainEvery = clamp(TrainEvery, 1 * 3600u, 6 * 3600u);
DBEngineAnomalyRateEvery = clamp(DBEngineAnomalyRateEvery, 1 * 30u, 15 * 60u);
DiffN = clamp(DiffN, 0u, 1u);
SmoothN = clamp(SmoothN, 0u, 5u);
LagN = clamp(LagN, 1u, 5u);
RandomSamplingRatio = clamp(RandomSamplingRatio, 0.2, 1.0);
MaxKMeansIters = clamp(MaxKMeansIters, 500u, 1000u);
DimensionAnomalyScoreThreshold = clamp(DimensionAnomalyScoreThreshold, 0.01, 5.00);
HostAnomalyRateThreshold = clamp(HostAnomalyRateThreshold, 0.01, 1.0);
ADMinWindowSize = clamp(ADMinWindowSize, 30.0, 300.0);
ADMaxWindowSize = clamp(ADMaxWindowSize, 60.0, 900.0);
ADIdleWindowSize = clamp(ADIdleWindowSize, 30.0, 900.0);
ADWindowRateThreshold = clamp(ADWindowRateThreshold, 0.01, 0.99);
ADDimensionRateThreshold = clamp(ADDimensionRateThreshold, 0.01, 0.99);
/*
* Validate
*/
if (MinTrainSamples >= MaxTrainSamples) {
error("invalid min/max train samples found (%u >= %u)", MinTrainSamples, MaxTrainSamples);
MinTrainSamples = 1 * 3600;
MaxTrainSamples = 4 * 3600;
}
if (ADMinWindowSize >= ADMaxWindowSize) {
error("invalid min/max anomaly window size found (%lf >= %lf)", ADMinWindowSize, ADMaxWindowSize);
ADMinWindowSize = 30.0;
ADMaxWindowSize = 600.0;
}
/*
* Assign to config instance
*/
Cfg.EnableAnomalyDetection = EnableAnomalyDetection;
Cfg.MaxTrainSamples = MaxTrainSamples;
Cfg.MinTrainSamples = MinTrainSamples;
Cfg.TrainEvery = TrainEvery;
Cfg.DBEngineAnomalyRateEvery = DBEngineAnomalyRateEvery;
Cfg.DiffN = DiffN;
Cfg.SmoothN = SmoothN;
Cfg.LagN = LagN;
Cfg.RandomSamplingRatio = RandomSamplingRatio;
Cfg.MaxKMeansIters = MaxKMeansIters;
Cfg.DimensionAnomalyScoreThreshold = DimensionAnomalyScoreThreshold;
Cfg.HostAnomalyRateThreshold = HostAnomalyRateThreshold;
Cfg.ADMinWindowSize = ADMinWindowSize;
Cfg.ADMaxWindowSize = ADMaxWindowSize;
Cfg.ADIdleWindowSize = ADIdleWindowSize;
Cfg.ADWindowRateThreshold = ADWindowRateThreshold;
Cfg.ADDimensionRateThreshold = ADDimensionRateThreshold;
Cfg.HostsToSkip = config_get(ConfigSectionML, "hosts to skip from training", "!*");
Cfg.SP_HostsToSkip = simple_pattern_create(Cfg.HostsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT);
// Always exclude anomaly_detection charts from training.
Cfg.ChartsToSkip = "anomaly_detection.* ";
Cfg.ChartsToSkip += config_get(ConfigSectionML, "charts to skip from training", "netdata.*");
Cfg.SP_ChartsToSkip = simple_pattern_create(ChartsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT);
Cfg.StreamADCharts = config_get_boolean(ConfigSectionML, "stream anomaly detection charts", true);
}