mirror of
https://github.com/netdata/netdata.git
synced 2025-04-26 05:47:20 +00:00

* rrdfamily * rrddim * rrdset plugin and module names * rrdset units * rrdset type * rrdset family * rrdset title * rrdset title more * rrdset context * rrdcalctemplate context and removal of context hash from rrdset * strings statistics * rrdset name * rearranged members of rrdset * eliminate rrdset name hash; rrdcalc chart converted to STRING * rrdset id, eliminated rrdset hash * rrdcalc, alarm_entry, alert_config and some of rrdcalctemplate * rrdcalctemplate * rrdvar * eval_variable * rrddimvar and rrdsetvar * rrdhost hostname, os and tags * fix master commits * added thread cache; implemented string_dup without locks * faster thread cache * rrdset and rrddim now use dictionaries for indexing * rrdhost now uses dictionary * rrdfamily now uses DICTIONARY * rrdvar using dictionary instead of AVL * allocate the right size to rrdvar flag members * rrdhost remaining char * members to STRING * * better error handling on indexing * strings now use a read/write lock to allow parallel searches to the index * removed AVL support from dictionaries; implemented STRING with native Judy calls * string releases should be negative * only 31 bits are allowed for enum flags * proper locking on strings * string threading unittest and fixes * fix lgtm finding * fixed naming * stream chart/dimension definitions at the beginning of a streaming session * thread stack variable is undefined on thread cancel * rrdcontext garbage collect per host on startup * worker control in garbage collection * relaxed deletion of rrdmetrics * type checking on dictfe * netdata chart to monitor rrdcontext triggers * Group chart label updates * rrdcontext better handling of collected rrdsets * rrdpush incremental transmition of definitions should use as much buffer as possible * require 1MB per chart * empty the sender buffer before enabling metrics streaming * fill up to 50% of buffer * reset signaling metrics sending * use the shared variable for status * use separate host flag for enabling streaming of metrics * make sure the flag is clear * add logging for streaming * add logging for streaming on buffer overflow * circular_buffer proper sizing * removed obsolete logs * do not execute worker jobs if not necessary * better messages about compression disabling * proper use of flags and updating rrdset last access time every time the obsoletion flag is flipped * monitor stream sender used buffer ratio * Update exporting unit tests * no need to compare label value with strcmp * streaming send workers now monitor bandwidth * workers now use strings * streaming receiver monitors incoming bandwidth * parser shift of worker ids * minor fixes * Group chart label updates * Populate context with dimensions that have data * Fix chart id * better shift of parser worker ids * fix for streaming compression * properly count received bytes * ensure LZ4 compression ring buffer does not wrap prematurely * do not stream empty charts; do not process empty instances in rrdcontext * need_to_send_chart_definition() does not need an rrdset lock any more * rrdcontext objects are collected, after data have been written to the db * better logging of RRDCONTEXT transitions * always set all variables needed by the worker utilization charts * implemented double linked list for most objects; eliminated alarm indexes from rrdhost; and many more fixes * lockless strings design - string_dup() and string_freez() are totally lockless when they dont need to touch Judy - only Judy is protected with a read/write lock * STRING code re-organization for clarity * thread_cache improvements; double numbers precision on worker threads * STRING_ENTRY now shadown STRING, so no duplicate definition is required; string_length() renamed to string_strlen() to follow the paradigm of all other functions, STRING internal statistics are now only compiled with NETDATA_INTERNAL_CHECKS * rrdhost index by hostname now cleans up; aclk queries of archieved hosts do not index hosts * Add index to speed up database context searches * Removed last_updated optimization (was also buggy after latest merge with master) Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Vladimir Kobal <vlad@prokk.net>
238 lines
5.6 KiB
C++
238 lines
5.6 KiB
C++
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "Config.h"
|
|
#include "Dimension.h"
|
|
#include "Host.h"
|
|
|
|
#include <random>
|
|
|
|
using namespace ml;
|
|
|
|
bool ml_capable() {
|
|
return true;
|
|
}
|
|
|
|
bool ml_enabled(RRDHOST *RH) {
|
|
if (!Cfg.EnableAnomalyDetection)
|
|
return false;
|
|
|
|
if (simple_pattern_matches(Cfg.SP_HostsToSkip, rrdhost_hostname(RH)))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Assumptions:
|
|
* 1) hosts outlive their sets, and sets outlive their dimensions,
|
|
* 2) dimensions always have a set that has a host.
|
|
*/
|
|
|
|
void ml_init(void) {
|
|
// Read config values
|
|
Cfg.readMLConfig();
|
|
|
|
if (!Cfg.EnableAnomalyDetection)
|
|
return;
|
|
|
|
// Generate random numbers to efficiently sample the features we need
|
|
// for KMeans clustering.
|
|
std::random_device RD;
|
|
std::mt19937 Gen(RD());
|
|
|
|
Cfg.RandomNums.reserve(Cfg.MaxTrainSamples);
|
|
for (size_t Idx = 0; Idx != Cfg.MaxTrainSamples; Idx++)
|
|
Cfg.RandomNums.push_back(Gen());
|
|
}
|
|
|
|
void ml_new_host(RRDHOST *RH) {
|
|
if (!ml_enabled(RH))
|
|
return;
|
|
|
|
Host *H = new Host(RH);
|
|
RH->ml_host = static_cast<ml_host_t>(H);
|
|
|
|
H->startAnomalyDetectionThreads();
|
|
}
|
|
|
|
void ml_delete_host(RRDHOST *RH) {
|
|
Host *H = static_cast<Host *>(RH->ml_host);
|
|
if (!H)
|
|
return;
|
|
|
|
H->stopAnomalyDetectionThreads();
|
|
|
|
delete H;
|
|
RH->ml_host = nullptr;
|
|
}
|
|
|
|
void ml_new_dimension(RRDDIM *RD) {
|
|
RRDSET *RS = RD->rrdset;
|
|
|
|
Host *H = static_cast<Host *>(RD->rrdset->rrdhost->ml_host);
|
|
if (!H)
|
|
return;
|
|
|
|
if (static_cast<unsigned>(RD->update_every) != H->updateEvery())
|
|
return;
|
|
|
|
if (simple_pattern_matches(Cfg.SP_ChartsToSkip, rrdset_name(RS)))
|
|
return;
|
|
|
|
Dimension *D = new Dimension(RD);
|
|
RD->ml_dimension = static_cast<ml_dimension_t>(D);
|
|
H->addDimension(D);
|
|
}
|
|
|
|
void ml_delete_dimension(RRDDIM *RD) {
|
|
Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
|
|
if (!D)
|
|
return;
|
|
|
|
Host *H = static_cast<Host *>(RD->rrdset->rrdhost->ml_host);
|
|
if (!H)
|
|
delete D;
|
|
else
|
|
H->removeDimension(D);
|
|
|
|
RD->ml_dimension = nullptr;
|
|
}
|
|
|
|
char *ml_get_host_info(RRDHOST *RH) {
|
|
nlohmann::json ConfigJson;
|
|
|
|
if (RH && RH->ml_host) {
|
|
Host *H = static_cast<Host *>(RH->ml_host);
|
|
H->getConfigAsJson(ConfigJson);
|
|
} else {
|
|
ConfigJson["enabled"] = false;
|
|
}
|
|
|
|
return strdup(ConfigJson.dump(2, '\t').c_str());
|
|
}
|
|
|
|
char *ml_get_host_runtime_info(RRDHOST *RH) {
|
|
nlohmann::json ConfigJson;
|
|
|
|
if (RH && RH->ml_host) {
|
|
Host *H = static_cast<Host *>(RH->ml_host);
|
|
H->getDetectionInfoAsJson(ConfigJson);
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
|
|
return strdup(ConfigJson.dump(1, '\t').c_str());
|
|
}
|
|
|
|
bool ml_is_anomalous(RRDDIM *RD, double Value, bool Exists) {
|
|
Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
|
|
if (!D)
|
|
return false;
|
|
|
|
D->addValue(Value, Exists);
|
|
bool Result = D->predict().second;
|
|
return Result;
|
|
}
|
|
|
|
char *ml_get_anomaly_events(RRDHOST *RH, const char *AnomalyDetectorName,
|
|
int AnomalyDetectorVersion, time_t After, time_t Before) {
|
|
if (!RH || !RH->ml_host) {
|
|
error("No host");
|
|
return nullptr;
|
|
}
|
|
|
|
Host *H = static_cast<Host *>(RH->ml_host);
|
|
std::vector<std::pair<time_t, time_t>> TimeRanges;
|
|
|
|
bool Res = H->getAnomaliesInRange(TimeRanges, AnomalyDetectorName,
|
|
AnomalyDetectorVersion,
|
|
H->getUUID(),
|
|
After, Before);
|
|
if (!Res) {
|
|
error("DB result is empty");
|
|
return nullptr;
|
|
}
|
|
|
|
nlohmann::json Json = TimeRanges;
|
|
return strdup(Json.dump(4).c_str());
|
|
}
|
|
|
|
char *ml_get_anomaly_event_info(RRDHOST *RH, const char *AnomalyDetectorName,
|
|
int AnomalyDetectorVersion, time_t After, time_t Before) {
|
|
if (!RH || !RH->ml_host) {
|
|
error("No host");
|
|
return nullptr;
|
|
}
|
|
|
|
Host *H = static_cast<Host *>(RH->ml_host);
|
|
|
|
nlohmann::json Json;
|
|
bool Res = H->getAnomalyInfo(Json, AnomalyDetectorName,
|
|
AnomalyDetectorVersion,
|
|
H->getUUID(),
|
|
After, Before);
|
|
if (!Res) {
|
|
error("DB result is empty");
|
|
return nullptr;
|
|
}
|
|
|
|
return strdup(Json.dump(4, '\t').c_str());
|
|
}
|
|
|
|
void ml_process_rrdr(RRDR *R, int MaxAnomalyRates) {
|
|
if (R->rows != 1)
|
|
return;
|
|
|
|
if (MaxAnomalyRates < 1 || MaxAnomalyRates >= R->d)
|
|
return;
|
|
|
|
NETDATA_DOUBLE *CNs = R->v;
|
|
RRDR_DIMENSION_FLAGS *DimFlags = R->od;
|
|
|
|
std::vector<std::pair<NETDATA_DOUBLE, int>> V;
|
|
|
|
V.reserve(R->d);
|
|
for (int Idx = 0; Idx != R->d; Idx++)
|
|
V.emplace_back(CNs[Idx], Idx);
|
|
|
|
std::sort(V.rbegin(), V.rend());
|
|
|
|
for (int Idx = MaxAnomalyRates; Idx != R->d; Idx++) {
|
|
int UnsortedIdx = V[Idx].second;
|
|
|
|
int OldFlags = static_cast<int>(DimFlags[UnsortedIdx]);
|
|
int NewFlags = OldFlags | RRDR_DIMENSION_HIDDEN;
|
|
|
|
DimFlags[UnsortedIdx] = static_cast<rrdr_dimension_flag>(NewFlags);
|
|
}
|
|
}
|
|
|
|
void ml_dimension_update_name(RRDSET *RS, RRDDIM *RD, const char *Name) {
|
|
(void) RS;
|
|
|
|
Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
|
|
if (!D)
|
|
return;
|
|
|
|
D->setAnomalyRateRDName(Name);
|
|
}
|
|
|
|
bool ml_streaming_enabled() {
|
|
return Cfg.StreamADCharts;
|
|
}
|
|
|
|
#if defined(ENABLE_ML_TESTS)
|
|
|
|
#include "gtest/gtest.h"
|
|
|
|
int test_ml(int argc, char *argv[]) {
|
|
(void) argc;
|
|
(void) argv;
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|
|
|
|
#endif // ENABLE_ML_TESTS
|
|
|
|
#include "ml-private.h"
|