mirror of
https://github.com/netdata/netdata.git
synced 2025-04-06 22:38:55 +00:00

* ML uses synchronous queries
* do not call malloc_trim() to free memory, since to locks everything
* Reschedule dimensions for training from worker threads.
* when we collect or read from the database, it is SAMPLES. When we generate points for a chart is POINTS
* keep the receiver send buffer 10x the default
* support autoscaling stream circular buffers
* nd_poll() prefers sending data vs receiving data - in an attempt to dequeue as soon as possible
* fix last commit
* allow removing receiver and senders inline, if the stream thread is not working on them
* fix logs
* Revert "nd_poll() prefers sending data vs receiving data - in an attempt to dequeue as soon as possible"
This reverts commit 51539a97da
.
* do not access receiver or sender after it has been removed
* open cache hot2clean
* open cache hot2clean does not need flushing
* use aral for extent pages up to 65k
* track aral malloc and mmap allocations separately; add 8192 as a possible value to PGD
* do not evict too frequently if not needed
* fix aral metrics
* fix aral metrics again
* accurate accounting of memory for dictionaries, strings, labels and MRG
* log during shutdown the progress of dbengine flushing
* move metasync shutfown after dbengine
* max iterations per I/O events
* max iterations per I/O events - break the loop
* max iterations per I/O events - break the loop - again
* disable inline evictions for all caches
* when writing to sockets, send everything that can be sent
* cleanup code to trigger evictions
* fix calculation of eviction size
* fix calculation of eviction size once more
* fix calculation of eviction size once more - again
* ml and replication stop while backfilling is running
* process opcodes while draining the sockets; log with limit when asking to disconnect a node
* fix log
* ml stops when replication queries are running
* report pgd_padding to pulse
* aral precise memory accounting
* removed all alignas() and fix the 2 issues that resulted in unaligned memory accesses (one in mqtt and another in streaming)
* remove the bigger sizes from PGD, but keep multiples of gorilla buffers
* exclude judy from sanitizers
* use 16 bytes alignment on 32 bit machines
* internal check about memory alignment
* experiment: do not allow more children to connect while there is backfilling or replication queries running
* when the node is initializing, retry in 30 seconds
* connector cleanup and isolation of control logic about enabling/disabling various parts
* stop also health queries while backfilling is running
* tuning
* drain the input
* improve interactivity when suspending
* more interactive stream_control
* debug logs to find the connection issue
* abstracted everything about stream control
* Add ml_host_{start,stop} again.
* Do not create/update anomaly-detection charts when ML is not running for a host.
* rrdhost flag RECEIVER_DISCONNECTED has been reversed to COLLECTOR_ONLINE and has been used for localhost and virtual hosts too, to have a single point of truth about the availability of collected data or not
* ml_host_start() and ml_host_stop() are used by streaming receivers; ml_host_start() is used for localhost and virtual hosts
* fixed typo
* allow up to 3 backfills at a time
* add throttling based on user queries
* restore cache line paddings
* unify streaming logs to make it easier to grep logs
* tuning of stream_control
* more logs unification
* use mallocz_release_as_much_memory_to_the_system() under extreme conditions
* do not rely on the response code of evict_pages()
* log the gap of the database every time a node is connected
* updated ram requirements
---------
Co-authored-by: vkalintiris <vasilis@netdata.cloud>
172 lines
6.6 KiB
C
172 lines
6.6 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#define PULSE_INTERNALS 1
|
|
#include "pulse-aral.h"
|
|
|
|
struct aral_info {
|
|
const char *name;
|
|
RRDSET *st_memory;
|
|
RRDDIM *rd_malloc_used, *rd_malloc_free, *rd_mmap_used, *rd_mmap_free, *rd_structures, *rd_padding;
|
|
|
|
RRDSET *st_utilization;
|
|
RRDDIM *rd_utilization;
|
|
};
|
|
|
|
DEFINE_JUDYL_TYPED(ARAL_STATS, struct aral_info *);
|
|
|
|
static struct {
|
|
SPINLOCK spinlock;
|
|
ARAL_STATS_JudyLSet idx;
|
|
} globals = { 0 };
|
|
|
|
static void pulse_aral_register_statistics(struct aral_statistics *stats, const char *name) {
|
|
if(!name || !stats)
|
|
return;
|
|
|
|
spinlock_lock(&globals.spinlock);
|
|
struct aral_info *ai = ARAL_STATS_GET(&globals.idx, (Word_t)stats);
|
|
if(!ai) {
|
|
ai = callocz(1, sizeof(struct aral_info));
|
|
ai->name = strdupz(name);
|
|
ARAL_STATS_SET(&globals.idx, (Word_t)stats, ai);
|
|
}
|
|
spinlock_unlock(&globals.spinlock);
|
|
}
|
|
|
|
void pulse_aral_register(ARAL *ar, const char *name) {
|
|
if(!ar) return;
|
|
|
|
if(!name)
|
|
name = aral_name(ar);
|
|
|
|
struct aral_statistics *stats = aral_get_statistics(ar);
|
|
|
|
pulse_aral_register_statistics(stats, name);
|
|
}
|
|
|
|
void pulse_aral_unregister(ARAL *ar) {
|
|
if(!ar) return;
|
|
struct aral_statistics *stats = aral_get_statistics(ar);
|
|
|
|
spinlock_lock(&globals.spinlock);
|
|
struct aral_info *ai = ARAL_STATS_GET(&globals.idx, (Word_t)stats);
|
|
if(ai) {
|
|
ARAL_STATS_DEL(&globals.idx, (Word_t)stats);
|
|
freez((void *)ai->name);
|
|
freez(ai);
|
|
}
|
|
spinlock_unlock(&globals.spinlock);
|
|
}
|
|
|
|
void pulse_aral_init(void) {
|
|
pulse_aral_register_statistics(aral_by_size_statistics(), "by-size");
|
|
}
|
|
|
|
void pulse_aral_do(bool extended) {
|
|
if(!extended) return;
|
|
|
|
spinlock_lock(&globals.spinlock);
|
|
Word_t s = 0;
|
|
for(struct aral_info *ai = ARAL_STATS_FIRST(&globals.idx, &s);
|
|
ai;
|
|
ai = ARAL_STATS_NEXT(&globals.idx, &s)) {
|
|
struct aral_statistics *stats = (void *)(uintptr_t)s;
|
|
if (!stats)
|
|
continue;
|
|
|
|
size_t malloc_allocated_bytes = __atomic_load_n(&stats->malloc.allocated_bytes, __ATOMIC_RELAXED);
|
|
size_t malloc_used_bytes = __atomic_load_n(&stats->malloc.used_bytes, __ATOMIC_RELAXED);
|
|
if(malloc_used_bytes > malloc_allocated_bytes)
|
|
malloc_allocated_bytes = malloc_used_bytes;
|
|
size_t malloc_free_bytes = malloc_allocated_bytes - malloc_used_bytes;
|
|
|
|
size_t mmap_allocated_bytes = __atomic_load_n(&stats->mmap.allocated_bytes, __ATOMIC_RELAXED);
|
|
size_t mmap_used_bytes = __atomic_load_n(&stats->mmap.used_bytes, __ATOMIC_RELAXED);
|
|
if(mmap_used_bytes > mmap_allocated_bytes)
|
|
mmap_allocated_bytes = mmap_used_bytes;
|
|
size_t mmap_free_bytes = mmap_allocated_bytes - mmap_used_bytes;
|
|
|
|
size_t structures_bytes = __atomic_load_n(&stats->structures.allocated_bytes, __ATOMIC_RELAXED);
|
|
|
|
size_t padding_bytes = __atomic_load_n(&stats->malloc.padding_bytes, __ATOMIC_RELAXED) +
|
|
__atomic_load_n(&stats->mmap.padding_bytes, __ATOMIC_RELAXED);
|
|
|
|
NETDATA_DOUBLE utilization;
|
|
if((malloc_used_bytes + mmap_used_bytes != 0) && (malloc_allocated_bytes + mmap_allocated_bytes != 0))
|
|
utilization = 100.0 * (NETDATA_DOUBLE)(malloc_used_bytes + mmap_used_bytes) / (NETDATA_DOUBLE)(malloc_allocated_bytes + mmap_allocated_bytes);
|
|
else
|
|
utilization = 100.0;
|
|
|
|
{
|
|
if (unlikely(!ai->st_memory)) {
|
|
char id[256];
|
|
|
|
snprintfz(id, sizeof(id), "aral_%s_memory", ai->name);
|
|
netdata_fix_chart_id(id);
|
|
|
|
ai->st_memory = rrdset_create_localhost(
|
|
"netdata",
|
|
id,
|
|
NULL,
|
|
"ARAL",
|
|
"netdata.aral_memory",
|
|
"Array Allocator Memory Utilization",
|
|
"bytes",
|
|
"netdata",
|
|
"pulse",
|
|
910000,
|
|
localhost->rrd_update_every,
|
|
RRDSET_TYPE_STACKED);
|
|
|
|
rrdlabels_add(ai->st_memory->rrdlabels, "ARAL", ai->name, RRDLABEL_SRC_AUTO);
|
|
|
|
ai->rd_malloc_free = rrddim_add(ai->st_memory, "malloc free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
ai->rd_mmap_free = rrddim_add(ai->st_memory, "mmap free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
ai->rd_malloc_used = rrddim_add(ai->st_memory, "malloc used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
ai->rd_mmap_used = rrddim_add(ai->st_memory, "mmap used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
ai->rd_structures = rrddim_add(ai->st_memory, "structures", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
ai->rd_padding = rrddim_add(ai->st_memory, "padding", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
|
}
|
|
|
|
rrddim_set_by_pointer(ai->st_memory, ai->rd_malloc_used, (collected_number)malloc_used_bytes);
|
|
rrddim_set_by_pointer(ai->st_memory, ai->rd_malloc_free, (collected_number)malloc_free_bytes);
|
|
rrddim_set_by_pointer(ai->st_memory, ai->rd_mmap_used, (collected_number)mmap_used_bytes);
|
|
rrddim_set_by_pointer(ai->st_memory, ai->rd_mmap_free, (collected_number)mmap_free_bytes);
|
|
rrddim_set_by_pointer(ai->st_memory, ai->rd_structures, (collected_number)structures_bytes);
|
|
rrddim_set_by_pointer(ai->st_memory, ai->rd_padding, (collected_number)padding_bytes);
|
|
rrdset_done(ai->st_memory);
|
|
}
|
|
|
|
{
|
|
if (unlikely(!ai->st_utilization)) {
|
|
char id[256];
|
|
|
|
snprintfz(id, sizeof(id), "aral_%s_utilization", ai->name);
|
|
netdata_fix_chart_id(id);
|
|
|
|
ai->st_utilization = rrdset_create_localhost(
|
|
"netdata",
|
|
id,
|
|
NULL,
|
|
"ARAL",
|
|
"netdata.aral_utilization",
|
|
"Array Allocator Memory Utilization",
|
|
"%",
|
|
"netdata",
|
|
"pulse",
|
|
910001,
|
|
localhost->rrd_update_every,
|
|
RRDSET_TYPE_LINE);
|
|
|
|
rrdlabels_add(ai->st_utilization->rrdlabels, "ARAL", ai->name, RRDLABEL_SRC_AUTO);
|
|
|
|
ai->rd_utilization = rrddim_add(ai->st_utilization, "utilization", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE);
|
|
}
|
|
|
|
rrddim_set_by_pointer(ai->st_utilization, ai->rd_utilization, (collected_number)(utilization * 10000.0));
|
|
rrdset_done(ai->st_utilization);
|
|
}
|
|
}
|
|
|
|
spinlock_unlock(&globals.spinlock);
|
|
}
|