0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-26 05:47:20 +00:00
netdata_netdata/ml/ml.cc
Costa Tsaousis 5e1b95cf92
Deduplicate all netdata strings ()
* rrdfamily

* rrddim

* rrdset plugin and module names

* rrdset units

* rrdset type

* rrdset family

* rrdset title

* rrdset title more

* rrdset context

* rrdcalctemplate context and removal of context hash from rrdset

* strings statistics

* rrdset name

* rearranged members of rrdset

* eliminate rrdset name hash; rrdcalc chart converted to STRING

* rrdset id, eliminated rrdset hash

* rrdcalc, alarm_entry, alert_config and some of rrdcalctemplate

* rrdcalctemplate

* rrdvar

* eval_variable

* rrddimvar and rrdsetvar

* rrdhost hostname, os and tags

* fix master commits

* added thread cache; implemented string_dup without locks

* faster thread cache

* rrdset and rrddim now use dictionaries for indexing

* rrdhost now uses dictionary

* rrdfamily now uses DICTIONARY

* rrdvar using dictionary instead of AVL

* allocate the right size to rrdvar flag members

* rrdhost remaining char * members to STRING *

* better error handling on indexing

* strings now use a read/write lock to allow parallel searches to the index

* removed AVL support from dictionaries; implemented STRING with native Judy calls

* string releases should be negative

* only 31 bits are allowed for enum flags

* proper locking on strings

* string threading unittest and fixes

* fix lgtm finding

* fixed naming

* stream chart/dimension definitions at the beginning of a streaming session

* thread stack variable is undefined on thread cancel

* rrdcontext garbage collect per host on startup

* worker control in garbage collection

* relaxed deletion of rrdmetrics

* type checking on dictfe

* netdata chart to monitor rrdcontext triggers

* Group chart label updates

* rrdcontext better handling of collected rrdsets

* rrdpush incremental transmition of definitions should use as much buffer as possible

* require 1MB per chart

* empty the sender buffer before enabling metrics streaming

* fill up to 50% of buffer

* reset signaling metrics sending

* use the shared variable for status

* use separate host flag for enabling streaming of metrics

* make sure the flag is clear

* add logging for streaming

* add logging for streaming on buffer overflow

* circular_buffer proper sizing

* removed obsolete logs

* do not execute worker jobs if not necessary

* better messages about compression disabling

* proper use of flags and updating rrdset last access time every time the obsoletion flag is flipped

* monitor stream sender used buffer ratio

* Update exporting unit tests

* no need to compare label value with strcmp

* streaming send workers now monitor bandwidth

* workers now use strings

* streaming receiver monitors incoming bandwidth

* parser shift of worker ids

* minor fixes

* Group chart label updates

* Populate context with dimensions that have data

* Fix chart id

* better shift of parser worker ids

* fix for streaming compression

* properly count received bytes

* ensure LZ4 compression ring buffer does not wrap prematurely

* do not stream empty charts; do not process empty instances in rrdcontext

* need_to_send_chart_definition() does not need an rrdset lock any more

* rrdcontext objects are collected, after data have been written to the db

* better logging of RRDCONTEXT transitions

* always set all variables needed by the worker utilization charts

* implemented double linked list for most objects; eliminated alarm indexes from rrdhost; and many more fixes

* lockless strings design - string_dup() and string_freez() are totally lockless when they dont need to touch Judy - only Judy is protected with a read/write lock

* STRING code re-organization for clarity

* thread_cache improvements; double numbers precision on worker threads

* STRING_ENTRY now shadown STRING, so no duplicate definition is required; string_length() renamed to string_strlen() to follow the paradigm of all other functions, STRING internal statistics are now only compiled with NETDATA_INTERNAL_CHECKS

* rrdhost index by hostname now cleans up; aclk queries of archieved hosts do not index hosts

* Add index to speed up database context searches

* Removed last_updated optimization (was also buggy after latest merge with master)

Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Co-authored-by: Vladimir Kobal <vlad@prokk.net>
2022-09-05 19:31:06 +03:00

238 lines
5.6 KiB
C++

// SPDX-License-Identifier: GPL-3.0-or-later
#include "Config.h"
#include "Dimension.h"
#include "Host.h"
#include <random>
using namespace ml;
bool ml_capable() {
return true;
}
bool ml_enabled(RRDHOST *RH) {
if (!Cfg.EnableAnomalyDetection)
return false;
if (simple_pattern_matches(Cfg.SP_HostsToSkip, rrdhost_hostname(RH)))
return false;
return true;
}
/*
* Assumptions:
* 1) hosts outlive their sets, and sets outlive their dimensions,
* 2) dimensions always have a set that has a host.
*/
void ml_init(void) {
// Read config values
Cfg.readMLConfig();
if (!Cfg.EnableAnomalyDetection)
return;
// Generate random numbers to efficiently sample the features we need
// for KMeans clustering.
std::random_device RD;
std::mt19937 Gen(RD());
Cfg.RandomNums.reserve(Cfg.MaxTrainSamples);
for (size_t Idx = 0; Idx != Cfg.MaxTrainSamples; Idx++)
Cfg.RandomNums.push_back(Gen());
}
void ml_new_host(RRDHOST *RH) {
if (!ml_enabled(RH))
return;
Host *H = new Host(RH);
RH->ml_host = static_cast<ml_host_t>(H);
H->startAnomalyDetectionThreads();
}
void ml_delete_host(RRDHOST *RH) {
Host *H = static_cast<Host *>(RH->ml_host);
if (!H)
return;
H->stopAnomalyDetectionThreads();
delete H;
RH->ml_host = nullptr;
}
void ml_new_dimension(RRDDIM *RD) {
RRDSET *RS = RD->rrdset;
Host *H = static_cast<Host *>(RD->rrdset->rrdhost->ml_host);
if (!H)
return;
if (static_cast<unsigned>(RD->update_every) != H->updateEvery())
return;
if (simple_pattern_matches(Cfg.SP_ChartsToSkip, rrdset_name(RS)))
return;
Dimension *D = new Dimension(RD);
RD->ml_dimension = static_cast<ml_dimension_t>(D);
H->addDimension(D);
}
void ml_delete_dimension(RRDDIM *RD) {
Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
if (!D)
return;
Host *H = static_cast<Host *>(RD->rrdset->rrdhost->ml_host);
if (!H)
delete D;
else
H->removeDimension(D);
RD->ml_dimension = nullptr;
}
char *ml_get_host_info(RRDHOST *RH) {
nlohmann::json ConfigJson;
if (RH && RH->ml_host) {
Host *H = static_cast<Host *>(RH->ml_host);
H->getConfigAsJson(ConfigJson);
} else {
ConfigJson["enabled"] = false;
}
return strdup(ConfigJson.dump(2, '\t').c_str());
}
char *ml_get_host_runtime_info(RRDHOST *RH) {
nlohmann::json ConfigJson;
if (RH && RH->ml_host) {
Host *H = static_cast<Host *>(RH->ml_host);
H->getDetectionInfoAsJson(ConfigJson);
} else {
return nullptr;
}
return strdup(ConfigJson.dump(1, '\t').c_str());
}
bool ml_is_anomalous(RRDDIM *RD, double Value, bool Exists) {
Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
if (!D)
return false;
D->addValue(Value, Exists);
bool Result = D->predict().second;
return Result;
}
char *ml_get_anomaly_events(RRDHOST *RH, const char *AnomalyDetectorName,
int AnomalyDetectorVersion, time_t After, time_t Before) {
if (!RH || !RH->ml_host) {
error("No host");
return nullptr;
}
Host *H = static_cast<Host *>(RH->ml_host);
std::vector<std::pair<time_t, time_t>> TimeRanges;
bool Res = H->getAnomaliesInRange(TimeRanges, AnomalyDetectorName,
AnomalyDetectorVersion,
H->getUUID(),
After, Before);
if (!Res) {
error("DB result is empty");
return nullptr;
}
nlohmann::json Json = TimeRanges;
return strdup(Json.dump(4).c_str());
}
char *ml_get_anomaly_event_info(RRDHOST *RH, const char *AnomalyDetectorName,
int AnomalyDetectorVersion, time_t After, time_t Before) {
if (!RH || !RH->ml_host) {
error("No host");
return nullptr;
}
Host *H = static_cast<Host *>(RH->ml_host);
nlohmann::json Json;
bool Res = H->getAnomalyInfo(Json, AnomalyDetectorName,
AnomalyDetectorVersion,
H->getUUID(),
After, Before);
if (!Res) {
error("DB result is empty");
return nullptr;
}
return strdup(Json.dump(4, '\t').c_str());
}
void ml_process_rrdr(RRDR *R, int MaxAnomalyRates) {
if (R->rows != 1)
return;
if (MaxAnomalyRates < 1 || MaxAnomalyRates >= R->d)
return;
NETDATA_DOUBLE *CNs = R->v;
RRDR_DIMENSION_FLAGS *DimFlags = R->od;
std::vector<std::pair<NETDATA_DOUBLE, int>> V;
V.reserve(R->d);
for (int Idx = 0; Idx != R->d; Idx++)
V.emplace_back(CNs[Idx], Idx);
std::sort(V.rbegin(), V.rend());
for (int Idx = MaxAnomalyRates; Idx != R->d; Idx++) {
int UnsortedIdx = V[Idx].second;
int OldFlags = static_cast<int>(DimFlags[UnsortedIdx]);
int NewFlags = OldFlags | RRDR_DIMENSION_HIDDEN;
DimFlags[UnsortedIdx] = static_cast<rrdr_dimension_flag>(NewFlags);
}
}
void ml_dimension_update_name(RRDSET *RS, RRDDIM *RD, const char *Name) {
(void) RS;
Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
if (!D)
return;
D->setAnomalyRateRDName(Name);
}
bool ml_streaming_enabled() {
return Cfg.StreamADCharts;
}
#if defined(ENABLE_ML_TESTS)
#include "gtest/gtest.h"
int test_ml(int argc, char *argv[]) {
(void) argc;
(void) argv;
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#endif // ENABLE_ML_TESTS
#include "ml-private.h"