diff --git a/src/daemon/config/README.md b/src/daemon/config/README.md index 384778baf9..f6b615afb5 100644 --- a/src/daemon/config/README.md +++ b/src/daemon/config/README.md @@ -35,18 +35,18 @@ After `netdata.conf` has been modified, Netdata needs to be [restarted](/docs/ne ### `global` section options -| setting | default | info | -|:----------------------------------:|:--------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| process scheduling policy | `keep` | See [Netdata process scheduling policy](/src/daemon/README.md#process-scheduling-policy-unix-only) | -| OOM score | `0` | | -| glibc malloc arena max for plugins | `1` | | -| glibc malloc arena max for Netdata | `1` | | -| hostname | auto-detected | The hostname of the computer running Netdata. | -| host access prefix | empty | This is used in Docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | -| timezone | auto-detected | The timezone retrieved from the environment variable | -| run as user | `netdata` | The user Netdata will run as. | -| pthread stack size | auto-detected | | -| crash reports | `all` or `off` | It is `off` when anonymous telemetry is disabled, otherwise `all`. When it is `all` Netdata reports agent restarts and crashes. It can also be `crashes` to report only crashes. Each kind of event is deduplicated and reported at most once per day. | +| setting | default | info | +|:----------------------------------:|:--------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| process scheduling policy | `keep` | See [Netdata process scheduling policy](/src/daemon/README.md#process-scheduling-policy-unix-only) | +| OOM score | `0` | | +| glibc malloc arena max for plugins | `1` | | +| glibc malloc arena max for Netdata | `1` | | +| hostname | auto-detected | The hostname of the computer running Netdata. | +| host access prefix | empty | This is used in Docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | +| timezone | auto-detected | The timezone retrieved from the environment variable | +| run as user | `netdata` | The user Netdata will run as. | +| pthread stack size | auto-detected | | +| crash reports | `all` or `off` | `all` when anonymous telemetry is enabled, or the agent is claimed or connected to Netdata Cloud (directly or via a Netdata Parent). When it is `all` Netdata reports restarts and crashes. It can also be `crashes` to report only crashes. When it is `off` nothing is reported. Each kind of event is deduplicated and reported at most once per day. [Read more at this blog post](https://www.netdata.cloud/blog/2025-03-06-monitoring-netdata-restarts/). | ### `db` section options diff --git a/src/daemon/daemon-status-file.c b/src/daemon/daemon-status-file.c index 4f2e2e6643..30a2615c4e 100644 --- a/src/daemon/daemon-status-file.c +++ b/src/daemon/daemon-status-file.c @@ -866,13 +866,15 @@ enum crash_report_t { }; static enum crash_report_t check_crash_reports_config(void) { - bool analytics = analytics_check_enabled(); + bool default_enabled = analytics_check_enabled() || + !UUIDiszero(session_status.node_id) || !UUIDiszero(last_session_status.node_id) || + !UUIDiszero(session_status.claim_id) || !UUIDiszero(last_session_status.claim_id); - const char *t = inicfg_get(&netdata_config, CONFIG_SECTION_GLOBAL, "crash reports", analytics ? "all" : "off"); + const char *t = inicfg_get(&netdata_config, CONFIG_SECTION_GLOBAL, "crash reports", default_enabled ? "all" : "off"); enum crash_report_t rc; if(!t || !*t) - rc = analytics ? DSF_REPORT_ALL : DSF_REPORT_DISABLED; + rc = default_enabled ? DSF_REPORT_ALL : DSF_REPORT_DISABLED; else if(strcmp(t, "all") == 0) rc = DSF_REPORT_ALL; else if(strcmp(t, "crashes") == 0) diff --git a/src/daemon/pulse/pulse.h b/src/daemon/pulse/pulse.h index a789d3b2c8..ea12281f00 100644 --- a/src/daemon/pulse/pulse.h +++ b/src/daemon/pulse/pulse.h @@ -42,11 +42,6 @@ void *pulse_thread_memory_extended_main(void *ptr); #define p1_store(variable, value) __atomic_store_n(variable, value, __ATOMIC_RELAXED) #define p1_load(variable) __atomic_load_n(variable, value, __ATOMIC_RELAXED) -#if !defined(PULSE_EXTENDED_STATISTICS) && (defined(NETDATA_INTERNAL_CHECKS) || defined(NETDATA_GOD_MODE)) -#define PULSE_EXTENDED_STATISTICS -#endif - -#if defined(PULSE_EXTENDED_STATISTICS) #define p2_add_fetch(variable, value) __atomic_add_fetch(variable, value, __ATOMIC_RELAXED) #define p2_sub_fetch(variable, value) __atomic_sub_fetch(variable, value, __ATOMIC_RELAXED) @@ -55,15 +50,5 @@ void *pulse_thread_memory_extended_main(void *ptr); #define p2_store(variable, value) __atomic_store_n(variable, value, __ATOMIC_RELAXED) #define p2_load(variable) __atomic_load_n(variable, value, __ATOMIC_RELAXED) -#else -#define p2_add_fetch(variable, value) debug_dummy() -#define p2_sub_fetch(variable, value) debug_dummy() - -#define p2_fetch_add(variable, value) debug_dummy() -#define p2_fetch_sub(variable, value) debug_dummy() - -#define p2_store(variable, value) debug_dummy() -#define p2_load(variable) debug_dummy() -#endif #endif /* NETDATA_PULSE_H */ diff --git a/src/database/engine/cache.c b/src/database/engine/cache.c index 8b934a686f..1556714a3c 100644 --- a/src/database/engine/cache.c +++ b/src/database/engine/cache.c @@ -334,7 +334,8 @@ static inline void pgc_size_histogram_del(PGC *cache, struct pgc_size_histogram // ---------------------------------------------------------------------------- // evictions control -static ALWAYS_INLINE int64_t pgc_threshold(ssize_t threshold, int64_t wanted, int64_t current, int64_t clean) { +ALWAYS_INLINE +static int64_t pgc_threshold(ssize_t threshold, int64_t wanted, int64_t current, int64_t clean) { if(current < clean) current = clean; @@ -348,6 +349,18 @@ static ALWAYS_INLINE int64_t pgc_threshold(ssize_t threshold, int64_t wanted, in return ret; } +ALWAYS_INLINE +static int64_t pgc_wanted_size(const int64_t hot, const int64_t hot_max, const int64_t dirty_max, const int64_t index) { + // our promise to users + const int64_t max_size1 = MAX(hot_max, hot) * 2; + + // protection against slow flushing + const int64_t max_size2 = hot_max + MAX(dirty_max * 2, hot_max * 2 / 3) + index; + + // the final wanted cache size + return MIN(max_size1, max_size2); +} + static ssize_t cache_usage_per1000(PGC *cache, int64_t *size_to_evict) { if(size_to_evict) @@ -372,20 +385,15 @@ static ssize_t cache_usage_per1000(PGC *cache, int64_t *size_to_evict) { const int64_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED); const int64_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED); - // our promise to users - const int64_t max_size1 = MAX(hot_max, hot) * 2; - - // protection against slow flushing - const int64_t max_size2 = hot_max + ((dirty_max * 2 < hot_max * 2 / 3) ? hot_max * 2 / 3 : dirty_max * 2) + index; - - // the final wanted cache size - wanted_cache_size = MIN(max_size1, max_size2); - if(cache->config.dynamic_target_size_cb) { + wanted_cache_size = pgc_wanted_size(hot, hot, dirty, index); + const int64_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb(); if(wanted_cache_size_cb > wanted_cache_size) wanted_cache_size = wanted_cache_size_cb; } + else + wanted_cache_size = pgc_wanted_size(hot, hot_max, dirty_max, index); if (wanted_cache_size < hot + dirty + index + cache->config.clean_size) wanted_cache_size = hot + dirty + index + cache->config.clean_size; diff --git a/src/database/engine/journalfile.c b/src/database/engine/journalfile.c index cff6bbe8ad..601c181e34 100644 --- a/src/database/engine/journalfile.c +++ b/src/database/engine/journalfile.c @@ -1329,7 +1329,7 @@ void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno int fd_v2; uint8_t *data_start = nd_mmap_advanced(path, total_file_size, MAP_SHARED, 0, false, true, &fd_v2); if(!data_start) - fatal("DBENGINE: failed to memory map file '%s' of size %zu.", path, total_file_size); + out_of_memory(__FUNCTION__, total_file_size, path); memset(data_start, 0, extent_offset); diff --git a/src/libnetdata/aral/aral.c b/src/libnetdata/aral/aral.c index d6523d080d..10b10179c4 100644 --- a/src/libnetdata/aral/aral.c +++ b/src/libnetdata/aral/aral.c @@ -544,8 +544,7 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ page->data = nd_mmap_advanced(page->filename, size, MAP_SHARED, 0, false, ar->config.options & ARAL_DONT_DUMP, NULL); if (unlikely(!page->data)) - fatal("ARAL: '%s' cannot allocate aral buffer of size %zu on filename '%s'", - ar->config.name, size, page->filename); + out_of_memory(__FUNCTION__, size, page->filename); total_size = size + sizeof(ARAL_PAGE); stats = &ar->stats->mmap; diff --git a/src/libnetdata/memory/nd-mallocz.c b/src/libnetdata/memory/nd-mallocz.c index 976e57e1a6..bd0f73e741 100644 --- a/src/libnetdata/memory/nd-mallocz.c +++ b/src/libnetdata/memory/nd-mallocz.c @@ -8,7 +8,7 @@ void mallocz_register_out_of_memory_cb(out_of_memory_cb cb) { } ALWAYS_INLINE NORETURN -static void out_of_memory(const char *call, size_t size) { +void out_of_memory(const char *call, size_t size, const char *details) { exit_initiated_add(EXIT_REASON_OUT_OF_MEMORY); if(out_of_memory_callback) @@ -33,10 +33,12 @@ static void out_of_memory(const char *call, size_t size) { fatal("Out of memory on %s(%zu bytes)!\n" "System memory available: %s, while our max RSS usage is: %s\n" - "O/S mmap limit: %llu, while our mmap count is: %zu", + "O/S mmap limit: %llu, while our mmap count is: %zu\n" + "Additional details: %s", call, size, mem_available, rss_used, - os_mmap_limit(), __atomic_load_n(&nd_mmap_count, __ATOMIC_RELAXED)); + os_mmap_limit(), __atomic_load_n(&nd_mmap_count, __ATOMIC_RELAXED), + details ? details : "none"); } // ---------------------------------------------------------------------------- @@ -429,7 +431,7 @@ char *strdupz(const char *s) { char *t = strdup(s); if (unlikely(!t)) - out_of_memory(__FUNCTION__ , strlen(s) + 1); + out_of_memory(__FUNCTION__ , strlen(s) + 1, NULL); return t; } @@ -440,7 +442,7 @@ char *strndupz(const char *s, size_t len) { char *t = strndup(s, len); if (unlikely(!t)) - out_of_memory(__FUNCTION__ , len + 1); + out_of_memory(__FUNCTION__ , len + 1, NULL); return t; } @@ -459,7 +461,7 @@ void *mallocz(size_t size) { workers_memory_call(WORKERS_MEMORY_CALL_LIBC_MALLOC); void *p = malloc(size); if (unlikely(!p)) - out_of_memory(__FUNCTION__, size); + out_of_memory(__FUNCTION__, size, NULL); return p; } @@ -469,7 +471,7 @@ void *callocz(size_t nmemb, size_t size) { workers_memory_call(WORKERS_MEMORY_CALL_LIBC_CALLOC); void *p = calloc(nmemb, size); if (unlikely(!p)) - out_of_memory(__FUNCTION__, nmemb * size); + out_of_memory(__FUNCTION__, nmemb * size, NULL); return p; } @@ -479,7 +481,7 @@ void *reallocz(void *ptr, size_t size) { workers_memory_call(WORKERS_MEMORY_CALL_LIBC_REALLOC); void *p = realloc(ptr, size); if (unlikely(!p)) - out_of_memory(__FUNCTION__, size); + out_of_memory(__FUNCTION__, size, NULL); return p; } @@ -489,7 +491,7 @@ int posix_memalignz(void **memptr, size_t alignment, size_t size) { workers_memory_call(WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN); int rc = posix_memalign(memptr, alignment, size); if(unlikely(rc)) - out_of_memory(__FUNCTION__, size); + out_of_memory(__FUNCTION__, size, NULL); return rc; } diff --git a/src/libnetdata/memory/nd-mallocz.h b/src/libnetdata/memory/nd-mallocz.h index d5aa6ba116..285763a838 100644 --- a/src/libnetdata/memory/nd-mallocz.h +++ b/src/libnetdata/memory/nd-mallocz.h @@ -67,4 +67,7 @@ void posix_memalign_freez(void *ptr); typedef void (*out_of_memory_cb)(void); void mallocz_register_out_of_memory_cb(out_of_memory_cb cb); +NORETURN +void out_of_memory(const char *call, size_t size, const char *details); + #endif //NETDATA_ND_MALLOCZ_H diff --git a/src/libnetdata/uuid/uuidmap.c b/src/libnetdata/uuid/uuidmap.c index 33e8bd867a..b45d0b949f 100644 --- a/src/libnetdata/uuid/uuidmap.c +++ b/src/libnetdata/uuid/uuidmap.c @@ -61,7 +61,7 @@ static void uuidmap_init_aral(void) { static UUIDMAP_ID get_next_id_unsafe(struct uuidmap_partition *partition) { // Check if we've reached the maximum ID value - if (partition->next_id >= 0x1FFFFFFF) + if (unlikely(partition->next_id >= 0x1FFFFFFF)) fatal("UUIDMAP: Maximum ID limit reached for partition %u. UUIDs exhausted.", (unsigned int)(partition - uuid_map.p));