mirror of
https://github.com/netdata/netdata.git
synced 2025-04-21 04:10:38 +00:00
minor fixes (#19849)
* extended dbegnine stats should be enabled * creash reports are enabled when the agent is claimed (directly or indirectly) * make mmap() report out of memory * for open cache, use the current hot size, not the max ever used
This commit is contained in:
parent
35214fd7ac
commit
b51fdecd43
9 changed files with 52 additions and 53 deletions
src
daemon
database/engine
libnetdata
|
@ -35,18 +35,18 @@ After `netdata.conf` has been modified, Netdata needs to be [restarted](/docs/ne
|
|||
|
||||
### `global` section options
|
||||
|
||||
| setting | default | info |
|
||||
|:----------------------------------:|:--------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| process scheduling policy | `keep` | See [Netdata process scheduling policy](/src/daemon/README.md#process-scheduling-policy-unix-only) |
|
||||
| OOM score | `0` | |
|
||||
| glibc malloc arena max for plugins | `1` | |
|
||||
| glibc malloc arena max for Netdata | `1` | |
|
||||
| hostname | auto-detected | The hostname of the computer running Netdata. |
|
||||
| host access prefix | empty | This is used in Docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
|
||||
| timezone | auto-detected | The timezone retrieved from the environment variable |
|
||||
| run as user | `netdata` | The user Netdata will run as. |
|
||||
| pthread stack size | auto-detected | |
|
||||
| crash reports | `all` or `off` | It is `off` when anonymous telemetry is disabled, otherwise `all`. When it is `all` Netdata reports agent restarts and crashes. It can also be `crashes` to report only crashes. Each kind of event is deduplicated and reported at most once per day. |
|
||||
| setting | default | info |
|
||||
|:----------------------------------:|:--------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| process scheduling policy | `keep` | See [Netdata process scheduling policy](/src/daemon/README.md#process-scheduling-policy-unix-only) |
|
||||
| OOM score | `0` | |
|
||||
| glibc malloc arena max for plugins | `1` | |
|
||||
| glibc malloc arena max for Netdata | `1` | |
|
||||
| hostname | auto-detected | The hostname of the computer running Netdata. |
|
||||
| host access prefix | empty | This is used in Docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
|
||||
| timezone | auto-detected | The timezone retrieved from the environment variable |
|
||||
| run as user | `netdata` | The user Netdata will run as. |
|
||||
| pthread stack size | auto-detected | |
|
||||
| crash reports | `all` or `off` | `all` when anonymous telemetry is enabled, or the agent is claimed or connected to Netdata Cloud (directly or via a Netdata Parent). When it is `all` Netdata reports restarts and crashes. It can also be `crashes` to report only crashes. When it is `off` nothing is reported. Each kind of event is deduplicated and reported at most once per day. [Read more at this blog post](https://www.netdata.cloud/blog/2025-03-06-monitoring-netdata-restarts/). |
|
||||
|
||||
### `db` section options
|
||||
|
||||
|
|
|
@ -866,13 +866,15 @@ enum crash_report_t {
|
|||
};
|
||||
|
||||
static enum crash_report_t check_crash_reports_config(void) {
|
||||
bool analytics = analytics_check_enabled();
|
||||
bool default_enabled = analytics_check_enabled() ||
|
||||
!UUIDiszero(session_status.node_id) || !UUIDiszero(last_session_status.node_id) ||
|
||||
!UUIDiszero(session_status.claim_id) || !UUIDiszero(last_session_status.claim_id);
|
||||
|
||||
const char *t = inicfg_get(&netdata_config, CONFIG_SECTION_GLOBAL, "crash reports", analytics ? "all" : "off");
|
||||
const char *t = inicfg_get(&netdata_config, CONFIG_SECTION_GLOBAL, "crash reports", default_enabled ? "all" : "off");
|
||||
|
||||
enum crash_report_t rc;
|
||||
if(!t || !*t)
|
||||
rc = analytics ? DSF_REPORT_ALL : DSF_REPORT_DISABLED;
|
||||
rc = default_enabled ? DSF_REPORT_ALL : DSF_REPORT_DISABLED;
|
||||
else if(strcmp(t, "all") == 0)
|
||||
rc = DSF_REPORT_ALL;
|
||||
else if(strcmp(t, "crashes") == 0)
|
||||
|
|
|
@ -42,11 +42,6 @@ void *pulse_thread_memory_extended_main(void *ptr);
|
|||
#define p1_store(variable, value) __atomic_store_n(variable, value, __ATOMIC_RELAXED)
|
||||
#define p1_load(variable) __atomic_load_n(variable, value, __ATOMIC_RELAXED)
|
||||
|
||||
#if !defined(PULSE_EXTENDED_STATISTICS) && (defined(NETDATA_INTERNAL_CHECKS) || defined(NETDATA_GOD_MODE))
|
||||
#define PULSE_EXTENDED_STATISTICS
|
||||
#endif
|
||||
|
||||
#if defined(PULSE_EXTENDED_STATISTICS)
|
||||
#define p2_add_fetch(variable, value) __atomic_add_fetch(variable, value, __ATOMIC_RELAXED)
|
||||
#define p2_sub_fetch(variable, value) __atomic_sub_fetch(variable, value, __ATOMIC_RELAXED)
|
||||
|
||||
|
@ -55,15 +50,5 @@ void *pulse_thread_memory_extended_main(void *ptr);
|
|||
|
||||
#define p2_store(variable, value) __atomic_store_n(variable, value, __ATOMIC_RELAXED)
|
||||
#define p2_load(variable) __atomic_load_n(variable, value, __ATOMIC_RELAXED)
|
||||
#else
|
||||
#define p2_add_fetch(variable, value) debug_dummy()
|
||||
#define p2_sub_fetch(variable, value) debug_dummy()
|
||||
|
||||
#define p2_fetch_add(variable, value) debug_dummy()
|
||||
#define p2_fetch_sub(variable, value) debug_dummy()
|
||||
|
||||
#define p2_store(variable, value) debug_dummy()
|
||||
#define p2_load(variable) debug_dummy()
|
||||
#endif
|
||||
|
||||
#endif /* NETDATA_PULSE_H */
|
||||
|
|
|
@ -334,7 +334,8 @@ static inline void pgc_size_histogram_del(PGC *cache, struct pgc_size_histogram
|
|||
// ----------------------------------------------------------------------------
|
||||
// evictions control
|
||||
|
||||
static ALWAYS_INLINE int64_t pgc_threshold(ssize_t threshold, int64_t wanted, int64_t current, int64_t clean) {
|
||||
ALWAYS_INLINE
|
||||
static int64_t pgc_threshold(ssize_t threshold, int64_t wanted, int64_t current, int64_t clean) {
|
||||
if(current < clean)
|
||||
current = clean;
|
||||
|
||||
|
@ -348,6 +349,18 @@ static ALWAYS_INLINE int64_t pgc_threshold(ssize_t threshold, int64_t wanted, in
|
|||
return ret;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE
|
||||
static int64_t pgc_wanted_size(const int64_t hot, const int64_t hot_max, const int64_t dirty_max, const int64_t index) {
|
||||
// our promise to users
|
||||
const int64_t max_size1 = MAX(hot_max, hot) * 2;
|
||||
|
||||
// protection against slow flushing
|
||||
const int64_t max_size2 = hot_max + MAX(dirty_max * 2, hot_max * 2 / 3) + index;
|
||||
|
||||
// the final wanted cache size
|
||||
return MIN(max_size1, max_size2);
|
||||
}
|
||||
|
||||
static ssize_t cache_usage_per1000(PGC *cache, int64_t *size_to_evict) {
|
||||
|
||||
if(size_to_evict)
|
||||
|
@ -372,20 +385,15 @@ static ssize_t cache_usage_per1000(PGC *cache, int64_t *size_to_evict) {
|
|||
const int64_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED);
|
||||
const int64_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED);
|
||||
|
||||
// our promise to users
|
||||
const int64_t max_size1 = MAX(hot_max, hot) * 2;
|
||||
|
||||
// protection against slow flushing
|
||||
const int64_t max_size2 = hot_max + ((dirty_max * 2 < hot_max * 2 / 3) ? hot_max * 2 / 3 : dirty_max * 2) + index;
|
||||
|
||||
// the final wanted cache size
|
||||
wanted_cache_size = MIN(max_size1, max_size2);
|
||||
|
||||
if(cache->config.dynamic_target_size_cb) {
|
||||
wanted_cache_size = pgc_wanted_size(hot, hot, dirty, index);
|
||||
|
||||
const int64_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb();
|
||||
if(wanted_cache_size_cb > wanted_cache_size)
|
||||
wanted_cache_size = wanted_cache_size_cb;
|
||||
}
|
||||
else
|
||||
wanted_cache_size = pgc_wanted_size(hot, hot_max, dirty_max, index);
|
||||
|
||||
if (wanted_cache_size < hot + dirty + index + cache->config.clean_size)
|
||||
wanted_cache_size = hot + dirty + index + cache->config.clean_size;
|
||||
|
|
|
@ -1329,7 +1329,7 @@ void journalfile_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno
|
|||
int fd_v2;
|
||||
uint8_t *data_start = nd_mmap_advanced(path, total_file_size, MAP_SHARED, 0, false, true, &fd_v2);
|
||||
if(!data_start)
|
||||
fatal("DBENGINE: failed to memory map file '%s' of size %zu.", path, total_file_size);
|
||||
out_of_memory(__FUNCTION__, total_file_size, path);
|
||||
|
||||
memset(data_start, 0, extent_offset);
|
||||
|
||||
|
|
|
@ -544,8 +544,7 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_
|
|||
page->data =
|
||||
nd_mmap_advanced(page->filename, size, MAP_SHARED, 0, false, ar->config.options & ARAL_DONT_DUMP, NULL);
|
||||
if (unlikely(!page->data))
|
||||
fatal("ARAL: '%s' cannot allocate aral buffer of size %zu on filename '%s'",
|
||||
ar->config.name, size, page->filename);
|
||||
out_of_memory(__FUNCTION__, size, page->filename);
|
||||
|
||||
total_size = size + sizeof(ARAL_PAGE);
|
||||
stats = &ar->stats->mmap;
|
||||
|
|
|
@ -8,7 +8,7 @@ void mallocz_register_out_of_memory_cb(out_of_memory_cb cb) {
|
|||
}
|
||||
|
||||
ALWAYS_INLINE NORETURN
|
||||
static void out_of_memory(const char *call, size_t size) {
|
||||
void out_of_memory(const char *call, size_t size, const char *details) {
|
||||
exit_initiated_add(EXIT_REASON_OUT_OF_MEMORY);
|
||||
|
||||
if(out_of_memory_callback)
|
||||
|
@ -33,10 +33,12 @@ static void out_of_memory(const char *call, size_t size) {
|
|||
|
||||
fatal("Out of memory on %s(%zu bytes)!\n"
|
||||
"System memory available: %s, while our max RSS usage is: %s\n"
|
||||
"O/S mmap limit: %llu, while our mmap count is: %zu",
|
||||
"O/S mmap limit: %llu, while our mmap count is: %zu\n"
|
||||
"Additional details: %s",
|
||||
call, size,
|
||||
mem_available, rss_used,
|
||||
os_mmap_limit(), __atomic_load_n(&nd_mmap_count, __ATOMIC_RELAXED));
|
||||
os_mmap_limit(), __atomic_load_n(&nd_mmap_count, __ATOMIC_RELAXED),
|
||||
details ? details : "none");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -429,7 +431,7 @@ char *strdupz(const char *s) {
|
|||
|
||||
char *t = strdup(s);
|
||||
if (unlikely(!t))
|
||||
out_of_memory(__FUNCTION__ , strlen(s) + 1);
|
||||
out_of_memory(__FUNCTION__ , strlen(s) + 1, NULL);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
@ -440,7 +442,7 @@ char *strndupz(const char *s, size_t len) {
|
|||
|
||||
char *t = strndup(s, len);
|
||||
if (unlikely(!t))
|
||||
out_of_memory(__FUNCTION__ , len + 1);
|
||||
out_of_memory(__FUNCTION__ , len + 1, NULL);
|
||||
|
||||
return t;
|
||||
}
|
||||
|
@ -459,7 +461,7 @@ void *mallocz(size_t size) {
|
|||
workers_memory_call(WORKERS_MEMORY_CALL_LIBC_MALLOC);
|
||||
void *p = malloc(size);
|
||||
if (unlikely(!p))
|
||||
out_of_memory(__FUNCTION__, size);
|
||||
out_of_memory(__FUNCTION__, size, NULL);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
@ -469,7 +471,7 @@ void *callocz(size_t nmemb, size_t size) {
|
|||
workers_memory_call(WORKERS_MEMORY_CALL_LIBC_CALLOC);
|
||||
void *p = calloc(nmemb, size);
|
||||
if (unlikely(!p))
|
||||
out_of_memory(__FUNCTION__, nmemb * size);
|
||||
out_of_memory(__FUNCTION__, nmemb * size, NULL);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
@ -479,7 +481,7 @@ void *reallocz(void *ptr, size_t size) {
|
|||
workers_memory_call(WORKERS_MEMORY_CALL_LIBC_REALLOC);
|
||||
void *p = realloc(ptr, size);
|
||||
if (unlikely(!p))
|
||||
out_of_memory(__FUNCTION__, size);
|
||||
out_of_memory(__FUNCTION__, size, NULL);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
@ -489,7 +491,7 @@ int posix_memalignz(void **memptr, size_t alignment, size_t size) {
|
|||
workers_memory_call(WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN);
|
||||
int rc = posix_memalign(memptr, alignment, size);
|
||||
if(unlikely(rc))
|
||||
out_of_memory(__FUNCTION__, size);
|
||||
out_of_memory(__FUNCTION__, size, NULL);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -67,4 +67,7 @@ void posix_memalign_freez(void *ptr);
|
|||
typedef void (*out_of_memory_cb)(void);
|
||||
void mallocz_register_out_of_memory_cb(out_of_memory_cb cb);
|
||||
|
||||
NORETURN
|
||||
void out_of_memory(const char *call, size_t size, const char *details);
|
||||
|
||||
#endif //NETDATA_ND_MALLOCZ_H
|
||||
|
|
|
@ -61,7 +61,7 @@ static void uuidmap_init_aral(void) {
|
|||
|
||||
static UUIDMAP_ID get_next_id_unsafe(struct uuidmap_partition *partition) {
|
||||
// Check if we've reached the maximum ID value
|
||||
if (partition->next_id >= 0x1FFFFFFF)
|
||||
if (unlikely(partition->next_id >= 0x1FFFFFFF))
|
||||
fatal("UUIDMAP: Maximum ID limit reached for partition %u. UUIDs exhausted.",
|
||||
(unsigned int)(partition - uuid_map.p));
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue