mirror of
https://github.com/netdata/netdata.git
synced 2025-04-14 17:48:37 +00:00
limit the glibc unused memory (#19380)
* limit the glibc unused memory * call malloc_trim() more frequently * use mallinfo2 to provide glibc fragmentation statistics
This commit is contained in:
parent
0232abdfe9
commit
4c2efd4028
7 changed files with 93 additions and 25 deletions
|
@ -492,6 +492,14 @@ int main() {
|
|||
}
|
||||
" HAVE_C_MALLOC_INFO)
|
||||
|
||||
check_c_source_compiles("
|
||||
#include <malloc.h>
|
||||
int main() {
|
||||
struct mallinfo2 m = mallinfo2();
|
||||
return 0;
|
||||
}
|
||||
" HAVE_C_MALLINFO2)
|
||||
|
||||
check_c_source_compiles("
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -99,6 +99,7 @@
|
|||
#cmakedefine HAVE_C_MALLOPT
|
||||
#cmakedefine HAVE_C_MALLOC_TRIM
|
||||
#cmakedefine HAVE_C_MALLOC_INFO
|
||||
#cmakedefine HAVE_C_MALLINFO2
|
||||
#cmakedefine HAVE_SETNS
|
||||
#cmakedefine HAVE_STRNDUP
|
||||
#cmakedefine SSL_HAS_PENDING
|
||||
|
|
|
@ -118,8 +118,8 @@ void nd_profile_setup(void) {
|
|||
else if(netdata_conf_is_parent()) {
|
||||
nd_profile.storage_tiers = 3;
|
||||
nd_profile.update_every = 1;
|
||||
nd_profile.malloc_arenas = os_get_system_cpus_cached(true);
|
||||
nd_profile.malloc_trim = 256 * 1024;
|
||||
nd_profile.malloc_arenas = 1;
|
||||
nd_profile.malloc_trim = 128 * 1024;
|
||||
nd_profile.stream_sender_compression = ND_COMPRESSION_FASTEST;
|
||||
// web server threads = dynamic
|
||||
// aclk query threads = dynamic
|
||||
|
|
|
@ -9,9 +9,11 @@
|
|||
|
||||
struct netdata_buffers_statistics netdata_buffers_statistics = { 0 };
|
||||
|
||||
#ifdef HAVE_C_MALLOC_INFO
|
||||
#if defined(HAVE_C_MALLOC_INFO) || defined(HAVE_C_MALLINFO2)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_C_MALLOC_INFO
|
||||
// Helper function to find the last occurrence of a substring in a string
|
||||
static char *find_last(const char *haystack, const char *needle, size_t *found) {
|
||||
*found = 0;
|
||||
|
@ -410,7 +412,7 @@ void pulse_daemon_memory_do(bool extended) {
|
|||
"Memory Usage",
|
||||
NULL,
|
||||
"Glibc Memory Arenas",
|
||||
"bytes",
|
||||
"arenas",
|
||||
"netdata",
|
||||
"pulse",
|
||||
130104,
|
||||
|
@ -435,11 +437,11 @@ void pulse_daemon_memory_do(bool extended) {
|
|||
if (unlikely(!st_malloc)) {
|
||||
st_malloc = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"glibc_memory",
|
||||
"glibc_malloc_info",
|
||||
NULL,
|
||||
"Memory Usage",
|
||||
NULL,
|
||||
"Glibc Memory Usage",
|
||||
"Glibc Malloc Info",
|
||||
"bytes",
|
||||
"netdata",
|
||||
"pulse",
|
||||
|
@ -463,4 +465,58 @@ void pulse_daemon_memory_do(bool extended) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_C_MALLINFO2
|
||||
{
|
||||
static RRDSET *st_mallinfo = NULL;
|
||||
static RRDDIM *rd_used_mmap = NULL;
|
||||
static RRDDIM *rd_used_arena = NULL;
|
||||
static RRDDIM *rd_unused_fragments = NULL;
|
||||
static RRDDIM *rd_unused_releasable = NULL;
|
||||
|
||||
if (unlikely(!st_mallinfo)) {
|
||||
st_mallinfo = rrdset_create_localhost(
|
||||
"netdata",
|
||||
"glibc_mallinfo2",
|
||||
NULL,
|
||||
"Memory Usage",
|
||||
NULL,
|
||||
"Glibc Mallinfo2 Memory Distribution",
|
||||
"bytes",
|
||||
"netdata",
|
||||
"pulse",
|
||||
130106,
|
||||
localhost->rrd_update_every,
|
||||
RRDSET_TYPE_STACKED);
|
||||
|
||||
rd_unused_releasable = rrddim_add(st_mallinfo, "unused releasable", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_unused_fragments = rrddim_add(st_mallinfo, "unused fragments", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_used_arena = rrddim_add(st_mallinfo, "used arena", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_used_mmap = rrddim_add(st_mallinfo, "used mmap", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
struct mallinfo2 mi = mallinfo2();
|
||||
|
||||
// Memory used in mmapped regions
|
||||
size_t used_mmap = mi.hblkhd;
|
||||
|
||||
// Memory used in arena (non-mmapped allocations)
|
||||
// uordblks includes both arena and mmap allocations, so we subtract mmap
|
||||
size_t used_arena = (mi.uordblks > mi.hblkhd) ? mi.uordblks - mi.hblkhd : 0;
|
||||
|
||||
// Releasable memory (can be released via malloc_trim())
|
||||
size_t unused_releasable = mi.keepcost;
|
||||
|
||||
// Fragmentation (remaining free space that's not easily releasable)
|
||||
// This includes free chunks (fordblks) minus the releasable space
|
||||
size_t unused_fragments = (mi.fordblks > mi.keepcost) ? mi.fordblks - mi.keepcost : 0;
|
||||
|
||||
rrddim_set_by_pointer(st_mallinfo, rd_unused_releasable, (collected_number)unused_releasable);
|
||||
rrddim_set_by_pointer(st_mallinfo, rd_unused_fragments, (collected_number)unused_fragments);
|
||||
rrddim_set_by_pointer(st_mallinfo, rd_used_arena, (collected_number)used_arena);
|
||||
rrddim_set_by_pointer(st_mallinfo, rd_used_mmap, (collected_number)used_mmap);
|
||||
|
||||
rrdset_done(st_mallinfo);
|
||||
}
|
||||
#endif // HAVE_C_MALLINFO2
|
||||
|
||||
}
|
||||
|
|
|
@ -1982,14 +1982,16 @@ void free_all_unreferenced_clean_pages(PGC *cache) {
|
|||
}
|
||||
|
||||
static void *pgc_evict_thread(void *ptr) {
|
||||
static usec_t last_malloc_release_ut = 0;
|
||||
|
||||
PGC *cache = ptr;
|
||||
|
||||
worker_register("PGCEVICT");
|
||||
worker_register_job_name(0, "signaled");
|
||||
worker_register_job_name(1, "scheduled");
|
||||
worker_register_job_name(2, "cleanup");
|
||||
|
||||
unsigned job_id = 0;
|
||||
usec_t last_malloc_release_ut = 0;
|
||||
|
||||
while (true) {
|
||||
worker_is_idle();
|
||||
|
@ -2002,18 +2004,19 @@ static void *pgc_evict_thread(void *ptr) {
|
|||
if (nd_thread_signaled_to_cancel())
|
||||
break;
|
||||
|
||||
size_t size_to_evict = 0;
|
||||
bool system_cleanup = false;
|
||||
if(cache_usage_per1000(cache, &size_to_evict) > cache->config.aggressive_evict_per1000)
|
||||
system_cleanup = true;
|
||||
|
||||
evict_pages(cache, 0, 0, true, false);
|
||||
|
||||
size_t size_to_evict = 0;
|
||||
if(cache_usage_per1000(cache, &size_to_evict) > cache->config.severe_pressure_per1000) {
|
||||
if(system_cleanup) {
|
||||
usec_t now_ut = now_monotonic_usec();
|
||||
|
||||
if(last_malloc_release_ut + USEC_PER_SEC < now_ut) {
|
||||
last_malloc_release_ut = now_ut;
|
||||
|
||||
// so, we tried 100 times to reduce memory, and a second has passed,
|
||||
// but it is still severe!
|
||||
|
||||
if(__atomic_load_n(&last_malloc_release_ut, __ATOMIC_RELAXED) + USEC_PER_SEC <= now_ut) {
|
||||
__atomic_store_n(&last_malloc_release_ut, now_ut, __ATOMIC_RELAXED);
|
||||
worker_is_busy(2);
|
||||
mallocz_release_as_much_memory_to_the_system();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -101,11 +101,12 @@ static size_t aral_sizes[] = {
|
|||
sizeof(PGD),
|
||||
|
||||
512, 1024, 1536, 2048, 5 * 512, 6 * 512, 7 * 512,
|
||||
1 * 4096, 2 * 4096, 3 * 4096, 4 * 4096, 5 * 4096,
|
||||
6 * 4096, 7 * 4096, 8 * 4096, 9 * 4096, 10 * 4096,
|
||||
11 * 4096, 12 * 4096, 13 * 4096, 14 * 4096, 15 * 4096,
|
||||
16 * 4096, 17 * 4096, 18 * 4096, 19 * 4096, 20 * 4096,
|
||||
21 * 4096, 22 * 4096, 23 * 4096, 24 * 4096, 25 * 4096,
|
||||
4 * 1024, 8 * 1024, 12 * 1024, 16 * 1024, 20 * 1024,
|
||||
24 * 1024, 28 * 1024, 32 * 1024, 36 * 1024, 40 * 1024,
|
||||
44 * 1024, 48 * 1024, 52 * 1024, 56 * 1024, 60 * 1024,
|
||||
64 * 1024, 68 * 1024, 72 * 1024, 76 * 1024, 80 * 1024,
|
||||
84 * 1024, 88 * 1024, 92 * 1024, 96 * 1024, 100 * 1024,
|
||||
104 * 1024, 108 * 1024, 112 * 1024, 116 * 1024, 120 * 1024,
|
||||
};
|
||||
static ARAL **arals = NULL;
|
||||
|
||||
|
|
|
@ -466,11 +466,10 @@ void posix_memfree(void *ptr) {
|
|||
void mallocz_release_as_much_memory_to_the_system(void) {
|
||||
#if defined(HAVE_C_MALLOC_TRIM)
|
||||
static SPINLOCK spinlock = SPINLOCK_INITIALIZER;
|
||||
spinlock_lock(&spinlock);
|
||||
|
||||
malloc_trim(0);
|
||||
|
||||
spinlock_unlock(&spinlock);
|
||||
if(spinlock_trylock(&spinlock)) {
|
||||
malloc_trim(0);
|
||||
spinlock_unlock(&spinlock);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue