mirror of
https://github.com/netdata/netdata.git
synced 2025-04-07 06:45:39 +00:00

* ML uses synchronous queries
* do not call malloc_trim() to free memory, since to locks everything
* Reschedule dimensions for training from worker threads.
* when we collect or read from the database, it is SAMPLES. When we generate points for a chart is POINTS
* keep the receiver send buffer 10x the default
* support autoscaling stream circular buffers
* nd_poll() prefers sending data vs receiving data - in an attempt to dequeue as soon as possible
* fix last commit
* allow removing receiver and senders inline, if the stream thread is not working on them
* fix logs
* Revert "nd_poll() prefers sending data vs receiving data - in an attempt to dequeue as soon as possible"
This reverts commit 51539a97da
.
* do not access receiver or sender after it has been removed
* open cache hot2clean
* open cache hot2clean does not need flushing
* use aral for extent pages up to 65k
* track aral malloc and mmap allocations separately; add 8192 as a possible value to PGD
* do not evict too frequently if not needed
* fix aral metrics
* fix aral metrics again
* accurate accounting of memory for dictionaries, strings, labels and MRG
* log during shutdown the progress of dbengine flushing
* move metasync shutfown after dbengine
* max iterations per I/O events
* max iterations per I/O events - break the loop
* max iterations per I/O events - break the loop - again
* disable inline evictions for all caches
* when writing to sockets, send everything that can be sent
* cleanup code to trigger evictions
* fix calculation of eviction size
* fix calculation of eviction size once more
* fix calculation of eviction size once more - again
* ml and replication stop while backfilling is running
* process opcodes while draining the sockets; log with limit when asking to disconnect a node
* fix log
* ml stops when replication queries are running
* report pgd_padding to pulse
* aral precise memory accounting
* removed all alignas() and fix the 2 issues that resulted in unaligned memory accesses (one in mqtt and another in streaming)
* remove the bigger sizes from PGD, but keep multiples of gorilla buffers
* exclude judy from sanitizers
* use 16 bytes alignment on 32 bit machines
* internal check about memory alignment
* experiment: do not allow more children to connect while there is backfilling or replication queries running
* when the node is initializing, retry in 30 seconds
* connector cleanup and isolation of control logic about enabling/disabling various parts
* stop also health queries while backfilling is running
* tuning
* drain the input
* improve interactivity when suspending
* more interactive stream_control
* debug logs to find the connection issue
* abstracted everything about stream control
* Add ml_host_{start,stop} again.
* Do not create/update anomaly-detection charts when ML is not running for a host.
* rrdhost flag RECEIVER_DISCONNECTED has been reversed to COLLECTOR_ONLINE and has been used for localhost and virtual hosts too, to have a single point of truth about the availability of collected data or not
* ml_host_start() and ml_host_stop() are used by streaming receivers; ml_host_start() is used for localhost and virtual hosts
* fixed typo
* allow up to 3 backfills at a time
* add throttling based on user queries
* restore cache line paddings
* unify streaming logs to make it easier to grep logs
* tuning of stream_control
* more logs unification
* use mallocz_release_as_much_memory_to_the_system() under extreme conditions
* do not rely on the response code of evict_pages()
* log the gap of the database every time a node is connected
* updated ram requirements
---------
Co-authored-by: vkalintiris <vasilis@netdata.cloud>
78 lines
2 KiB
C++
78 lines
2 KiB
C++
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "ml_enums.h"
|
|
|
|
const char *
|
|
ml_machine_learning_status_to_string(enum ml_machine_learning_status mls)
|
|
{
|
|
switch (mls) {
|
|
case MACHINE_LEARNING_STATUS_ENABLED:
|
|
return "enabled";
|
|
case MACHINE_LEARNING_STATUS_DISABLED_DUE_TO_EXCLUDED_CHART:
|
|
return "disabled-sp";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
const char *
|
|
ml_metric_type_to_string(enum ml_metric_type mt)
|
|
{
|
|
switch (mt) {
|
|
case METRIC_TYPE_CONSTANT:
|
|
return "constant";
|
|
case METRIC_TYPE_VARIABLE:
|
|
return "variable";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
const char *
|
|
ml_training_status_to_string(enum ml_training_status ts)
|
|
{
|
|
switch (ts) {
|
|
case TRAINING_STATUS_TRAINED:
|
|
return "trained";
|
|
case TRAINING_STATUS_UNTRAINED:
|
|
return "untrained";
|
|
case TRAINING_STATUS_SILENCED:
|
|
return "silenced";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
const char *
|
|
ml_worker_result_to_string(enum ml_worker_result tr)
|
|
{
|
|
switch (tr) {
|
|
case ML_WORKER_RESULT_OK:
|
|
return "ok";
|
|
case ML_WORKER_RESULT_INVALID_QUERY_TIME_RANGE:
|
|
return "invalid-query";
|
|
case ML_WORKER_RESULT_NOT_ENOUGH_COLLECTED_VALUES:
|
|
return "missing-values";
|
|
case ML_WORKER_RESULT_NULL_ACQUIRED_DIMENSION:
|
|
return "null-acquired-dim";
|
|
case ML_WORKER_RESULT_CHART_UNDER_REPLICATION:
|
|
return "chart-under-replication";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
const char *
|
|
ml_queue_item_type_to_string(enum ml_queue_item_type qit)
|
|
{
|
|
switch (qit) {
|
|
case ML_QUEUE_ITEM_TYPE_CREATE_NEW_MODEL:
|
|
return "create-new-model";
|
|
case ML_QUEUE_ITEM_TYPE_ADD_EXISTING_MODEL:
|
|
return "add-existing-model";
|
|
case ML_QUEUE_ITEM_STOP_REQUEST:
|
|
return "stop-request";
|
|
default:
|
|
return "unknown";
|
|
}
|
|
}
|