0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-21 04:10:38 +00:00

minor fixes ()

* extended dbegnine stats should be enabled

* creash reports are enabled when the agent is claimed (directly or indirectly)

* make mmap() report out of memory

* for open cache, use the current hot size, not the max ever used
This commit is contained in:
Costa Tsaousis 2025-03-13 14:25:55 +00:00 committed by GitHub
parent 35214fd7ac
commit b51fdecd43
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 52 additions and 53 deletions

View file

@ -35,18 +35,18 @@ After `netdata.conf` has been modified, Netdata needs to be [restarted](/docs/ne
### `global` section options
| setting | default | info |
|:----------------------------------:|:--------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| process scheduling policy | `keep` | See [Netdata process scheduling policy](/src/daemon/README.md#process-scheduling-policy-unix-only) |
| OOM score | `0` | |
| glibc malloc arena max for plugins | `1` | |
| glibc malloc arena max for Netdata | `1` | |
| hostname | auto-detected | The hostname of the computer running Netdata. |
| host access prefix | empty | This is used in Docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
| timezone | auto-detected | The timezone retrieved from the environment variable |
| run as user | `netdata` | The user Netdata will run as. |
| pthread stack size | auto-detected | |
| crash reports | `all` or `off` | It is `off` when anonymous telemetry is disabled, otherwise `all`. When it is `all` Netdata reports agent restarts and crashes. It can also be `crashes` to report only crashes. Each kind of event is deduplicated and reported at most once per day. |
| setting | default | info |
|:----------------------------------:|:--------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| process scheduling policy | `keep` | See [Netdata process scheduling policy](/src/daemon/README.md#process-scheduling-policy-unix-only) |
| OOM score | `0` | |
| glibc malloc arena max for plugins | `1` | |
| glibc malloc arena max for Netdata | `1` | |
| hostname | auto-detected | The hostname of the computer running Netdata. |
| host access prefix | empty | This is used in Docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
| timezone | auto-detected | The timezone retrieved from the environment variable |
| run as user | `netdata` | The user Netdata will run as. |
| pthread stack size | auto-detected | |
| crash reports | `all` or `off` | `all` when anonymous telemetry is enabled, or the agent is claimed or connected to Netdata Cloud (directly or via a Netdata Parent). When it is `all` Netdata reports restarts and crashes. It can also be `crashes` to report only crashes. When it is `off` nothing is reported. Each kind of event is deduplicated and reported at most once per day. [Read more at this blog post](https://www.netdata.cloud/blog/2025-03-06-monitoring-netdata-restarts/). |
### `db` section options

View file

@ -866,13 +866,15 @@ enum crash_report_t {
};
static enum crash_report_t check_crash_reports_config(void) {
bool analytics = analytics_check_enabled();
bool default_enabled = analytics_check_enabled() ||
!UUIDiszero(session_status.node_id) || !UUIDiszero(last_session_status.node_id) ||
!UUIDiszero(session_status.claim_id) || !UUIDiszero(last_session_status.claim_id);
const char *t = inicfg_get(&netdata_config, CONFIG_SECTION_GLOBAL, "crash reports", analytics ? "all" : "off");
const char *t = inicfg_get(&netdata_config, CONFIG_SECTION_GLOBAL, "crash reports", default_enabled ? "all" : "off");
enum crash_report_t rc;
if(!t || !*t)
rc = analytics ? DSF_REPORT_ALL : DSF_REPORT_DISABLED;
rc = default_enabled ? DSF_REPORT_ALL : DSF_REPORT_DISABLED;
else if(strcmp(t, "all") == 0)
rc = DSF_REPORT_ALL;
else if(strcmp(t, "crashes") == 0)

View file

@ -42,11 +42,6 @@ void *pulse_thread_memory_extended_main(void *ptr);
#define p1_store(variable, value) __atomic_store_n(variable, value, __ATOMIC_RELAXED)
#define p1_load(variable) __atomic_load_n(variable, value, __ATOMIC_RELAXED)
#if !defined(PULSE_EXTENDED_STATISTICS) && (defined(NETDATA_INTERNAL_CHECKS) || defined(NETDATA_GOD_MODE))
#define PULSE_EXTENDED_STATISTICS
#endif
#if defined(PULSE_EXTENDED_STATISTICS)
#define p2_add_fetch(variable, value) __atomic_add_fetch(variable, value, __ATOMIC_RELAXED)
#define p2_sub_fetch(variable, value) __atomic_sub_fetch(variable, value, __ATOMIC_RELAXED)
@ -55,15 +50,5 @@ void *pulse_thread_memory_extended_main(void *ptr);
#define p2_store(variable, value) __atomic_store_n(variable, value, __ATOMIC_RELAXED)
#define p2_load(variable) __atomic_load_n(variable, value, __ATOMIC_RELAXED)
#else
#define p2_add_fetch(variable, value) debug_dummy()
#define p2_sub_fetch(variable, value) debug_dummy()
#define p2_fetch_add(variable, value) debug_dummy()
#define p2_fetch_sub(variable, value) debug_dummy()
#define p2_store(variable, value) debug_dummy()
#define p2_load(variable) debug_dummy()
#endif
#endif /* NETDATA_PULSE_H */

View file

@ -334,7 +334,8 @@ static inline void pgc_size_histogram_del(PGC *cache, struct pgc_size_histogram
// ----------------------------------------------------------------------------
// evictions control
static ALWAYS_INLINE int64_t pgc_threshold(ssize_t threshold, int64_t wanted, int64_t current, int64_t clean) {
ALWAYS_INLINE
static int64_t pgc_threshold(ssize_t threshold, int64_t wanted, int64_t current, int64_t clean) {
if(current < clean)
current = clean;
@ -348,6 +349,18 @@ static ALWAYS_INLINE int64_t pgc_threshold(ssize_t threshold, int64_t wanted, in
return ret;
}
ALWAYS_INLINE
static int64_t pgc_wanted_size(const int64_t hot, const int64_t hot_max, const int64_t dirty_max, const int64_t index) {
// our promise to users
const int64_t max_size1 = MAX(hot_max, hot) * 2;
// protection against slow flushing
const int64_t max_size2 = hot_max + MAX(dirty_max * 2, hot_max * 2 / 3) + index;
// the final wanted cache size
return MIN(max_size1, max_size2);
}
static ssize_t cache_usage_per1000(PGC *cache, int64_t *size_to_evict) {
if(size_to_evict)
@ -372,20 +385,15 @@ static ssize_t cache_usage_per1000(PGC *cache, int64_t *size_to_evict) {
const int64_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED);
const int64_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED);
// our promise to users
const int64_t max_size1 = MAX(hot_max, hot) * 2;
// protection against slow flushing
const int64_t max_size2 = hot_max + ((dirty_max * 2 < hot_max * 2 / 3) ? hot_max * 2 / 3 : dirty_max * 2) + index;
// the final wanted cache size
wanted_cache_size = MIN(max_size1, max_size2);
if(cache->config.dynamic_target_size_cb) {
wanted_cache_size = pgc_wanted_size(hot, hot, dirty, index);
const int64_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb();
if(wanted_cache_size_cb > wanted_cache_size)
wanted_cache_size = wanted_cache_size_cb;
}
else
wanted_cache_size = pgc_wanted_size(hot, hot_max, dirty_max, index);
if (wanted_cache_size < hot + dirty + index + cache->config.clean_size)
wanted_cache_size = hot + dirty + index + cache->config.clean_size;

View file

@ -1329,7 +1329,7 @@ void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno
int fd_v2;
uint8_t *data_start = nd_mmap_advanced(path, total_file_size, MAP_SHARED, 0, false, true, &fd_v2);
if(!data_start)
fatal("DBENGINE: failed to memory map file '%s' of size %zu.", path, total_file_size);
out_of_memory(__FUNCTION__, total_file_size, path);
memset(data_start, 0, extent_offset);

View file

@ -544,8 +544,7 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_
page->data =
nd_mmap_advanced(page->filename, size, MAP_SHARED, 0, false, ar->config.options & ARAL_DONT_DUMP, NULL);
if (unlikely(!page->data))
fatal("ARAL: '%s' cannot allocate aral buffer of size %zu on filename '%s'",
ar->config.name, size, page->filename);
out_of_memory(__FUNCTION__, size, page->filename);
total_size = size + sizeof(ARAL_PAGE);
stats = &ar->stats->mmap;

View file

@ -8,7 +8,7 @@ void mallocz_register_out_of_memory_cb(out_of_memory_cb cb) {
}
ALWAYS_INLINE NORETURN
static void out_of_memory(const char *call, size_t size) {
void out_of_memory(const char *call, size_t size, const char *details) {
exit_initiated_add(EXIT_REASON_OUT_OF_MEMORY);
if(out_of_memory_callback)
@ -33,10 +33,12 @@ static void out_of_memory(const char *call, size_t size) {
fatal("Out of memory on %s(%zu bytes)!\n"
"System memory available: %s, while our max RSS usage is: %s\n"
"O/S mmap limit: %llu, while our mmap count is: %zu",
"O/S mmap limit: %llu, while our mmap count is: %zu\n"
"Additional details: %s",
call, size,
mem_available, rss_used,
os_mmap_limit(), __atomic_load_n(&nd_mmap_count, __ATOMIC_RELAXED));
os_mmap_limit(), __atomic_load_n(&nd_mmap_count, __ATOMIC_RELAXED),
details ? details : "none");
}
// ----------------------------------------------------------------------------
@ -429,7 +431,7 @@ char *strdupz(const char *s) {
char *t = strdup(s);
if (unlikely(!t))
out_of_memory(__FUNCTION__ , strlen(s) + 1);
out_of_memory(__FUNCTION__ , strlen(s) + 1, NULL);
return t;
}
@ -440,7 +442,7 @@ char *strndupz(const char *s, size_t len) {
char *t = strndup(s, len);
if (unlikely(!t))
out_of_memory(__FUNCTION__ , len + 1);
out_of_memory(__FUNCTION__ , len + 1, NULL);
return t;
}
@ -459,7 +461,7 @@ void *mallocz(size_t size) {
workers_memory_call(WORKERS_MEMORY_CALL_LIBC_MALLOC);
void *p = malloc(size);
if (unlikely(!p))
out_of_memory(__FUNCTION__, size);
out_of_memory(__FUNCTION__, size, NULL);
return p;
}
@ -469,7 +471,7 @@ void *callocz(size_t nmemb, size_t size) {
workers_memory_call(WORKERS_MEMORY_CALL_LIBC_CALLOC);
void *p = calloc(nmemb, size);
if (unlikely(!p))
out_of_memory(__FUNCTION__, nmemb * size);
out_of_memory(__FUNCTION__, nmemb * size, NULL);
return p;
}
@ -479,7 +481,7 @@ void *reallocz(void *ptr, size_t size) {
workers_memory_call(WORKERS_MEMORY_CALL_LIBC_REALLOC);
void *p = realloc(ptr, size);
if (unlikely(!p))
out_of_memory(__FUNCTION__, size);
out_of_memory(__FUNCTION__, size, NULL);
return p;
}
@ -489,7 +491,7 @@ int posix_memalignz(void **memptr, size_t alignment, size_t size) {
workers_memory_call(WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN);
int rc = posix_memalign(memptr, alignment, size);
if(unlikely(rc))
out_of_memory(__FUNCTION__, size);
out_of_memory(__FUNCTION__, size, NULL);
return rc;
}

View file

@ -67,4 +67,7 @@ void posix_memalign_freez(void *ptr);
typedef void (*out_of_memory_cb)(void);
void mallocz_register_out_of_memory_cb(out_of_memory_cb cb);
NORETURN
void out_of_memory(const char *call, size_t size, const char *details);
#endif //NETDATA_ND_MALLOCZ_H

View file

@ -61,7 +61,7 @@ static void uuidmap_init_aral(void) {
static UUIDMAP_ID get_next_id_unsafe(struct uuidmap_partition *partition) {
// Check if we've reached the maximum ID value
if (partition->next_id >= 0x1FFFFFFF)
if (unlikely(partition->next_id >= 0x1FFFFFFF))
fatal("UUIDMAP: Maximum ID limit reached for partition %u. UUIDs exhausted.",
(unsigned int)(partition - uuid_map.p));