mirror of
https://github.com/netdata/netdata.git
synced 2025-05-20 15:59:55 +00:00
proc.plugin: add pressure stall information (#7209)
* proc.plugin: add pressure stall information * dashboard_info: add "Pressure" section * proc.plugin: mention PSI collector in doc * dashboard_info: fix grammar in PSI section * proc_pressure: fix wrong line name for "full" metrics * proc_pressure: fix copypasta * proc_pressure: refactor to prepare for cgroup changes * cgroups.plugin: add pressure monitoring * add proc_pressure.h to targets * Makefile.am: fix indentation * cgroups.plugin: remove a useless comment * cgroups.plugin: fix pressure config name * proc.plugin: arrange pressure charts under corresponding sections * dashboard_info: rearrange pressure chart descriptions * dashboard_info: reword PSI descriptions
This commit is contained in:
parent
998470b66b
commit
8a70725c13
9 changed files with 512 additions and 0 deletions
|
@ -438,6 +438,8 @@ set(PROC_PLUGIN_FILES
|
||||||
collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c
|
collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c
|
||||||
collectors/proc.plugin/proc_vmstat.c
|
collectors/proc.plugin/proc_vmstat.c
|
||||||
collectors/proc.plugin/proc_uptime.c
|
collectors/proc.plugin/proc_uptime.c
|
||||||
|
collectors/proc.plugin/proc_pressure.c
|
||||||
|
collectors/proc.plugin/proc_pressure.h
|
||||||
collectors/proc.plugin/sys_kernel_mm_ksm.c
|
collectors/proc.plugin/sys_kernel_mm_ksm.c
|
||||||
collectors/proc.plugin/sys_block_zram.c
|
collectors/proc.plugin/sys_block_zram.c
|
||||||
collectors/proc.plugin/sys_devices_system_edac_mc.c
|
collectors/proc.plugin/sys_devices_system_edac_mc.c
|
||||||
|
|
|
@ -263,6 +263,8 @@ PROC_PLUGIN_FILES = \
|
||||||
collectors/proc.plugin/proc_loadavg.c \
|
collectors/proc.plugin/proc_loadavg.c \
|
||||||
collectors/proc.plugin/proc_meminfo.c \
|
collectors/proc.plugin/proc_meminfo.c \
|
||||||
collectors/proc.plugin/proc_pagetypeinfo.c \
|
collectors/proc.plugin/proc_pagetypeinfo.c \
|
||||||
|
collectors/proc.plugin/proc_pressure.c \
|
||||||
|
collectors/proc.plugin/proc_pressure.h \
|
||||||
collectors/proc.plugin/proc_net_dev.c \
|
collectors/proc.plugin/proc_net_dev.c \
|
||||||
collectors/proc.plugin/proc_net_ip_vs_stats.c \
|
collectors/proc.plugin/proc_net_ip_vs_stats.c \
|
||||||
collectors/proc.plugin/proc_net_netstat.c \
|
collectors/proc.plugin/proc_net_netstat.c \
|
||||||
|
|
|
@ -23,6 +23,11 @@ static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO;
|
||||||
static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO;
|
static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO;
|
||||||
static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO;
|
static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO;
|
||||||
static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO;
|
static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO;
|
||||||
|
static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO;
|
||||||
|
static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
|
||||||
|
static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
|
||||||
|
static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
|
||||||
|
static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
|
||||||
|
|
||||||
static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
|
static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
|
||||||
static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
|
static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
|
||||||
|
@ -105,6 +110,12 @@ void read_cgroup_plugin_configuration() {
|
||||||
cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops);
|
cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops);
|
||||||
cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops);
|
cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops);
|
||||||
|
|
||||||
|
cgroup_enable_pressure_cpu = config_get_boolean_ondemand("plugin:cgroups", "enable cpu pressure", cgroup_enable_pressure_cpu);
|
||||||
|
cgroup_enable_pressure_io_some = config_get_boolean_ondemand("plugin:cgroups", "enable io some pressure", cgroup_enable_pressure_io_some);
|
||||||
|
cgroup_enable_pressure_io_full = config_get_boolean_ondemand("plugin:cgroups", "enable io full pressure", cgroup_enable_pressure_io_full);
|
||||||
|
cgroup_enable_pressure_memory_some = config_get_boolean_ondemand("plugin:cgroups", "enable memory some pressure", cgroup_enable_pressure_memory_some);
|
||||||
|
cgroup_enable_pressure_memory_full = config_get_boolean_ondemand("plugin:cgroups", "enable memory full pressure", cgroup_enable_pressure_memory_full);
|
||||||
|
|
||||||
cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations);
|
cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations);
|
||||||
cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations);
|
cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations);
|
||||||
cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations);
|
cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations);
|
||||||
|
@ -116,6 +127,13 @@ void read_cgroup_plugin_configuration() {
|
||||||
char filename[FILENAME_MAX + 1], *s;
|
char filename[FILENAME_MAX + 1], *s;
|
||||||
struct mountinfo *mi, *root = mountinfo_read(0);
|
struct mountinfo *mi, *root = mountinfo_read(0);
|
||||||
if(!cgroup_use_unified_cgroups) {
|
if(!cgroup_use_unified_cgroups) {
|
||||||
|
// cgroup v1 does not have pressure metrics
|
||||||
|
cgroup_enable_pressure_cpu =
|
||||||
|
cgroup_enable_pressure_io_some =
|
||||||
|
cgroup_enable_pressure_io_full =
|
||||||
|
cgroup_enable_pressure_memory_some =
|
||||||
|
cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO;
|
||||||
|
|
||||||
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct");
|
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct");
|
||||||
if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct");
|
if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct");
|
||||||
if(!mi) {
|
if(!mi) {
|
||||||
|
@ -461,6 +479,10 @@ struct cgroup {
|
||||||
|
|
||||||
struct cgroup_network_interface *interfaces;
|
struct cgroup_network_interface *interfaces;
|
||||||
|
|
||||||
|
struct pressure cpu_pressure;
|
||||||
|
struct pressure io_pressure;
|
||||||
|
struct pressure memory_pressure;
|
||||||
|
|
||||||
// per cgroup charts
|
// per cgroup charts
|
||||||
RRDSET *st_cpu;
|
RRDSET *st_cpu;
|
||||||
RRDSET *st_cpu_limit;
|
RRDSET *st_cpu_limit;
|
||||||
|
@ -798,6 +820,54 @@ static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void cgroup2_read_pressure(struct pressure *res) {
|
||||||
|
static procfile *ff = NULL;
|
||||||
|
|
||||||
|
if (likely(res->filename)) {
|
||||||
|
ff = procfile_reopen(ff, res->filename, " =", PROCFILE_FLAG_DEFAULT);
|
||||||
|
if (unlikely(!ff)) {
|
||||||
|
res->updated = 0;
|
||||||
|
cgroups_check = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ff = procfile_readall(ff);
|
||||||
|
if (unlikely(!ff)) {
|
||||||
|
res->updated = 0;
|
||||||
|
cgroups_check = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t lines = procfile_lines(ff);
|
||||||
|
if (lines < 1) {
|
||||||
|
error("CGROUP: file '%s' should have 1+ lines.", res->filename);
|
||||||
|
res->updated = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
res->some.value10 = strtod(procfile_lineword(ff, 0, 2), NULL);
|
||||||
|
res->some.value60 = strtod(procfile_lineword(ff, 0, 4), NULL);
|
||||||
|
res->some.value300 = strtod(procfile_lineword(ff, 0, 6), NULL);
|
||||||
|
|
||||||
|
if (lines > 2) {
|
||||||
|
res->full.value10 = strtod(procfile_lineword(ff, 1, 2), NULL);
|
||||||
|
res->full.value60 = strtod(procfile_lineword(ff, 1, 4), NULL);
|
||||||
|
res->full.value300 = strtod(procfile_lineword(ff, 1, 6), NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
res->updated = 1;
|
||||||
|
|
||||||
|
if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) {
|
||||||
|
res->some.enabled = CONFIG_BOOLEAN_YES;
|
||||||
|
if (lines > 2) {
|
||||||
|
res->full.enabled = CONFIG_BOOLEAN_YES;
|
||||||
|
} else {
|
||||||
|
res->full.enabled = CONFIG_BOOLEAN_NO;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) {
|
static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) {
|
||||||
static procfile *ff = NULL;
|
static procfile *ff = NULL;
|
||||||
|
|
||||||
|
@ -946,6 +1016,9 @@ static inline void cgroup_read(struct cgroup *cg) {
|
||||||
cgroup2_read_blkio(&cg->io_service_bytes, 0);
|
cgroup2_read_blkio(&cg->io_service_bytes, 0);
|
||||||
cgroup2_read_blkio(&cg->io_serviced, 4);
|
cgroup2_read_blkio(&cg->io_serviced, 4);
|
||||||
cgroup2_read_cpuacct_stat(&cg->cpuacct_stat);
|
cgroup2_read_cpuacct_stat(&cg->cpuacct_stat);
|
||||||
|
cgroup2_read_pressure(&cg->cpu_pressure);
|
||||||
|
cgroup2_read_pressure(&cg->io_pressure);
|
||||||
|
cgroup2_read_pressure(&cg->memory_pressure);
|
||||||
cgroup_read_memory(&cg->memory, 1);
|
cgroup_read_memory(&cg->memory, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1236,6 +1309,12 @@ static inline struct cgroup *cgroup_add(const char *id) {
|
||||||
return cg;
|
return cg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void free_pressure(struct pressure *res) {
|
||||||
|
if (res->some.st) rrdset_is_obsolete(res->some.st);
|
||||||
|
if (res->full.st) rrdset_is_obsolete(res->full.st);
|
||||||
|
freez(res->filename);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void cgroup_free(struct cgroup *cg) {
|
static inline void cgroup_free(struct cgroup *cg) {
|
||||||
debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available");
|
debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available");
|
||||||
|
|
||||||
|
@ -1284,6 +1363,10 @@ static inline void cgroup_free(struct cgroup *cg) {
|
||||||
freez(cg->io_merged.filename);
|
freez(cg->io_merged.filename);
|
||||||
freez(cg->io_queued.filename);
|
freez(cg->io_queued.filename);
|
||||||
|
|
||||||
|
free_pressure(&cg->cpu_pressure);
|
||||||
|
free_pressure(&cg->io_pressure);
|
||||||
|
free_pressure(&cg->memory_pressure);
|
||||||
|
|
||||||
freez(cg->id);
|
freez(cg->id);
|
||||||
freez(cg->chart_id);
|
freez(cg->chart_id);
|
||||||
freez(cg->chart_title);
|
freez(cg->chart_title);
|
||||||
|
@ -1748,6 +1831,42 @@ static inline void find_all_cgroups() {
|
||||||
else
|
else
|
||||||
debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename);
|
debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) {
|
||||||
|
snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id);
|
||||||
|
if (likely(stat(filename, &buf) != -1)) {
|
||||||
|
cg->cpu_pressure.filename = strdupz(filename);
|
||||||
|
cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu;
|
||||||
|
cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO;
|
||||||
|
debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename);
|
||||||
|
} else {
|
||||||
|
debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) {
|
||||||
|
snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id);
|
||||||
|
if (likely(stat(filename, &buf) != -1)) {
|
||||||
|
cg->io_pressure.filename = strdupz(filename);
|
||||||
|
cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some;
|
||||||
|
cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full;
|
||||||
|
debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename);
|
||||||
|
} else {
|
||||||
|
debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) {
|
||||||
|
snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id);
|
||||||
|
if (likely(stat(filename, &buf) != -1)) {
|
||||||
|
cg->memory_pressure.filename = strdupz(filename);
|
||||||
|
cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some;
|
||||||
|
cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full;
|
||||||
|
debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename);
|
||||||
|
} else {
|
||||||
|
debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3364,6 +3483,156 @@ void update_cgroup_charts(int update_every) {
|
||||||
rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write);
|
rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write);
|
||||||
rrdset_done(cg->st_merged_ops);
|
rrdset_done(cg->st_merged_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) {
|
||||||
|
struct pressure *res = &cg->cpu_pressure;
|
||||||
|
if (likely(res->updated && res->some.enabled)) {
|
||||||
|
if (unlikely(!res->some.st)) {
|
||||||
|
RRDSET *chart;
|
||||||
|
snprintfz(title, CHART_TITLE_MAX, "CPU pressure for cgroup %s", cg->chart_title);
|
||||||
|
|
||||||
|
chart = res->some.st = rrdset_create_localhost(
|
||||||
|
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||||
|
, "cpu_pressure"
|
||||||
|
, NULL
|
||||||
|
, "cpu"
|
||||||
|
, "cgroup.cpu_pressure"
|
||||||
|
, title
|
||||||
|
, "percentage"
|
||||||
|
, PLUGIN_CGROUPS_NAME
|
||||||
|
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||||
|
, cgroup_containers_chart_priority + 2200
|
||||||
|
, update_every,
|
||||||
|
RRDSET_TYPE_LINE);
|
||||||
|
|
||||||
|
res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
} else {
|
||||||
|
rrdset_next(res->some.st);
|
||||||
|
}
|
||||||
|
|
||||||
|
update_pressure_chart(&res->some);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = &cg->memory_pressure;
|
||||||
|
if (likely(res->updated && res->some.enabled)) {
|
||||||
|
if (unlikely(!res->some.st)) {
|
||||||
|
RRDSET *chart;
|
||||||
|
snprintfz(title, CHART_TITLE_MAX, "Memory pressure for cgroup %s", cg->chart_title);
|
||||||
|
|
||||||
|
chart = res->some.st = rrdset_create_localhost(
|
||||||
|
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||||
|
, "mem_pressure"
|
||||||
|
, NULL
|
||||||
|
, "mem"
|
||||||
|
, "cgroup.memory_pressure"
|
||||||
|
, title
|
||||||
|
, "percentage"
|
||||||
|
, PLUGIN_CGROUPS_NAME
|
||||||
|
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||||
|
, cgroup_containers_chart_priority + 2300
|
||||||
|
, update_every,
|
||||||
|
RRDSET_TYPE_LINE);
|
||||||
|
|
||||||
|
res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
} else {
|
||||||
|
rrdset_next(res->some.st);
|
||||||
|
}
|
||||||
|
|
||||||
|
update_pressure_chart(&res->some);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (likely(res->updated && res->full.enabled)) {
|
||||||
|
if (unlikely(!res->full.st)) {
|
||||||
|
RRDSET *chart;
|
||||||
|
snprintfz(title, CHART_TITLE_MAX, "Memory full pressure for cgroup %s", cg->chart_title);
|
||||||
|
|
||||||
|
chart = res->full.st = rrdset_create_localhost(
|
||||||
|
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||||
|
, "mem_full_pressure"
|
||||||
|
, NULL
|
||||||
|
, "mem"
|
||||||
|
, "cgroup.memory_full_pressure"
|
||||||
|
, title
|
||||||
|
, "percentage"
|
||||||
|
, PLUGIN_CGROUPS_NAME
|
||||||
|
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||||
|
, cgroup_containers_chart_priority + 2350
|
||||||
|
, update_every,
|
||||||
|
RRDSET_TYPE_LINE);
|
||||||
|
|
||||||
|
res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
} else {
|
||||||
|
rrdset_next(res->full.st);
|
||||||
|
}
|
||||||
|
|
||||||
|
update_pressure_chart(&res->full);
|
||||||
|
}
|
||||||
|
|
||||||
|
res = &cg->io_pressure;
|
||||||
|
if (likely(res->updated && res->some.enabled)) {
|
||||||
|
if (unlikely(!res->some.st)) {
|
||||||
|
RRDSET *chart;
|
||||||
|
snprintfz(title, CHART_TITLE_MAX, "I/O pressure for cgroup %s", cg->chart_title);
|
||||||
|
|
||||||
|
chart = res->some.st = rrdset_create_localhost(
|
||||||
|
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||||
|
, "io_pressure"
|
||||||
|
, NULL
|
||||||
|
, "disk"
|
||||||
|
, "cgroup.io_pressure"
|
||||||
|
, title
|
||||||
|
, "percentage"
|
||||||
|
, PLUGIN_CGROUPS_NAME
|
||||||
|
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||||
|
, cgroup_containers_chart_priority + 2400
|
||||||
|
, update_every,
|
||||||
|
RRDSET_TYPE_LINE);
|
||||||
|
|
||||||
|
res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
} else {
|
||||||
|
rrdset_next(res->some.st);
|
||||||
|
}
|
||||||
|
|
||||||
|
update_pressure_chart(&res->some);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (likely(res->updated && res->full.enabled)) {
|
||||||
|
if (unlikely(!res->full.st)) {
|
||||||
|
RRDSET *chart;
|
||||||
|
snprintfz(title, CHART_TITLE_MAX, "I/O full pressure for cgroup %s", cg->chart_title);
|
||||||
|
|
||||||
|
chart = res->full.st = rrdset_create_localhost(
|
||||||
|
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||||
|
, "io_full_pressure"
|
||||||
|
, NULL
|
||||||
|
, "disk"
|
||||||
|
, "cgroup.io_full_pressure"
|
||||||
|
, title
|
||||||
|
, "percentage"
|
||||||
|
, PLUGIN_CGROUPS_NAME
|
||||||
|
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||||
|
, cgroup_containers_chart_priority + 2450
|
||||||
|
, update_every,
|
||||||
|
RRDSET_TYPE_LINE);
|
||||||
|
|
||||||
|
res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
} else {
|
||||||
|
rrdset_next(res->full.st);
|
||||||
|
}
|
||||||
|
|
||||||
|
update_pressure_chart(&res->full);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(likely(cgroup_enable_systemd_services))
|
if(likely(cgroup_enable_systemd_services))
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
- `/proc/interrupts` (total and per core hardware interrupts)
|
- `/proc/interrupts` (total and per core hardware interrupts)
|
||||||
- `/proc/softirqs` (total and per core software interrupts)
|
- `/proc/softirqs` (total and per core software interrupts)
|
||||||
- `/proc/loadavg` (system load and total processes running)
|
- `/proc/loadavg` (system load and total processes running)
|
||||||
|
- `/proc/pressure/{cpu,memory,io}` (pressure stall information)
|
||||||
- `/proc/sys/kernel/random/entropy_avail` (random numbers pool availability - used in cryptography)
|
- `/proc/sys/kernel/random/entropy_avail` (random numbers pool availability - used in cryptography)
|
||||||
- `/sys/class/power_supply` (power supply properties)
|
- `/sys/class/power_supply` (power supply properties)
|
||||||
- `ipc` (IPC semaphores and message queues)
|
- `ipc` (IPC semaphores and message queues)
|
||||||
|
|
|
@ -21,6 +21,9 @@ static struct proc_module {
|
||||||
{ .name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg },
|
{ .name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg },
|
||||||
{ .name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail },
|
{ .name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail },
|
||||||
|
|
||||||
|
// pressure metrics
|
||||||
|
{ .name = "/proc/pressure", .dim = "pressure", .func = do_proc_pressure },
|
||||||
|
|
||||||
// CPU metrics
|
// CPU metrics
|
||||||
{ .name = "/proc/interrupts", .dim = "interrupts", .func = do_proc_interrupts },
|
{ .name = "/proc/interrupts", .dim = "interrupts", .func = do_proc_interrupts },
|
||||||
{ .name = "/proc/softirqs", .dim = "softirqs", .func = do_proc_softirqs },
|
{ .name = "/proc/softirqs", .dim = "softirqs", .func = do_proc_softirqs },
|
||||||
|
|
|
@ -40,6 +40,7 @@ extern int do_proc_net_rpc_nfsd(int update_every, usec_t dt);
|
||||||
extern int do_proc_sys_kernel_random_entropy_avail(int update_every, usec_t dt);
|
extern int do_proc_sys_kernel_random_entropy_avail(int update_every, usec_t dt);
|
||||||
extern int do_proc_interrupts(int update_every, usec_t dt);
|
extern int do_proc_interrupts(int update_every, usec_t dt);
|
||||||
extern int do_proc_softirqs(int update_every, usec_t dt);
|
extern int do_proc_softirqs(int update_every, usec_t dt);
|
||||||
|
extern int do_proc_pressure(int update_every, usec_t dt);
|
||||||
extern int do_sys_kernel_mm_ksm(int update_every, usec_t dt);
|
extern int do_sys_kernel_mm_ksm(int update_every, usec_t dt);
|
||||||
extern int do_sys_block_zram(int update_every, usec_t dt);
|
extern int do_sys_block_zram(int update_every, usec_t dt);
|
||||||
extern int do_proc_loadavg(int update_every, usec_t dt);
|
extern int do_proc_loadavg(int update_every, usec_t dt);
|
||||||
|
@ -66,6 +67,7 @@ extern void netdev_rename_device_add(const char *host_device, const char *contai
|
||||||
extern void netdev_rename_device_del(const char *host_device);
|
extern void netdev_rename_device_del(const char *host_device);
|
||||||
|
|
||||||
#include "proc_self_mountinfo.h"
|
#include "proc_self_mountinfo.h"
|
||||||
|
#include "proc_pressure.h"
|
||||||
#include "zfs_common.h"
|
#include "zfs_common.h"
|
||||||
|
|
||||||
#else // (TARGET_OS == OS_LINUX)
|
#else // (TARGET_OS == OS_LINUX)
|
||||||
|
|
177
collectors/proc.plugin/proc_pressure.c
Normal file
177
collectors/proc.plugin/proc_pressure.c
Normal file
|
@ -0,0 +1,177 @@
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
#include "plugin_proc.h"
|
||||||
|
|
||||||
|
#define PLUGIN_PROC_MODULE_PRESSURE_NAME "/proc/pressure"
|
||||||
|
#define CONFIG_SECTION_PLUGIN_PROC_PRESSURE "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_PRESSURE_NAME
|
||||||
|
|
||||||
|
// linux calculates this every 2 seconds, see kernel/sched/psi.c PSI_FREQ
|
||||||
|
#define MIN_PRESSURE_UPDATE_EVERY 2
|
||||||
|
|
||||||
|
|
||||||
|
static struct pressure resources[PRESSURE_NUM_RESOURCES] = {
|
||||||
|
{
|
||||||
|
.some = { .id = "cpu_pressure", .title = "CPU Pressure" },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.some = { .id = "memory_some_pressure", .title = "Memory Pressure" },
|
||||||
|
.full = { .id = "memory_full_pressure", .title = "Memory Full Pressure" },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.some = { .id = "io_some_pressure", .title = "I/O Pressure" },
|
||||||
|
.full = { .id = "io_full_pressure", .title = "I/O Full Pressure" },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct {
|
||||||
|
procfile *pf;
|
||||||
|
const char *name; // metric file name
|
||||||
|
const char *family; // webui section name
|
||||||
|
int section_priority;
|
||||||
|
} resource_info[PRESSURE_NUM_RESOURCES] = {
|
||||||
|
{ .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU },
|
||||||
|
{ .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM },
|
||||||
|
{ .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO },
|
||||||
|
};
|
||||||
|
|
||||||
|
void update_pressure_chart(struct pressure_chart *chart) {
|
||||||
|
rrddim_set_by_pointer(chart->st, chart->rd10, (collected_number)(chart->value10 * 100));
|
||||||
|
rrddim_set_by_pointer(chart->st, chart->rd60, (collected_number) (chart->value60 * 100));
|
||||||
|
rrddim_set_by_pointer(chart->st, chart->rd300, (collected_number) (chart->value300 * 100));
|
||||||
|
|
||||||
|
rrdset_done(chart->st);
|
||||||
|
}
|
||||||
|
|
||||||
|
int do_proc_pressure(int update_every, usec_t dt) {
|
||||||
|
int fail_count = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
static usec_t next_pressure_dt = 0;
|
||||||
|
static char *base_path = NULL;
|
||||||
|
|
||||||
|
update_every = (update_every < MIN_PRESSURE_UPDATE_EVERY) ? MIN_PRESSURE_UPDATE_EVERY : update_every;
|
||||||
|
|
||||||
|
if (next_pressure_dt <= dt) {
|
||||||
|
next_pressure_dt = update_every * USEC_PER_SEC;
|
||||||
|
} else {
|
||||||
|
next_pressure_dt -= dt;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(!base_path)) {
|
||||||
|
base_path = config_get(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, "base path of pressure metrics", "/proc/pressure");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < PRESSURE_NUM_RESOURCES; i++) {
|
||||||
|
procfile *ff = resource_info[i].pf;
|
||||||
|
int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled;
|
||||||
|
|
||||||
|
if (unlikely(!ff)) {
|
||||||
|
char filename[FILENAME_MAX + 1];
|
||||||
|
char config_key[CONFIG_MAX_NAME + 1];
|
||||||
|
|
||||||
|
snprintfz(filename
|
||||||
|
, FILENAME_MAX
|
||||||
|
, "%s%s/%s"
|
||||||
|
, netdata_configured_host_prefix
|
||||||
|
, base_path
|
||||||
|
, resource_info[i].name);
|
||||||
|
|
||||||
|
snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name);
|
||||||
|
do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
|
||||||
|
resources[i].some.enabled = do_some;
|
||||||
|
if (resources[i].full.id) {
|
||||||
|
snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
|
||||||
|
do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
|
||||||
|
resources[i].full.enabled = do_full;
|
||||||
|
}
|
||||||
|
|
||||||
|
ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT);
|
||||||
|
if (unlikely(!ff)) {
|
||||||
|
error("Cannot read pressure information from %s.", filename);
|
||||||
|
fail_count++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ff = procfile_readall(ff);
|
||||||
|
resource_info[i].pf = ff;
|
||||||
|
if (unlikely(!ff)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t lines = procfile_lines(ff);
|
||||||
|
if (unlikely(lines < 1)) {
|
||||||
|
error("%s has no lines.", procfile_filename(ff));
|
||||||
|
fail_count++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pressure_chart *chart;
|
||||||
|
if (do_some) {
|
||||||
|
chart = &resources[i].some;
|
||||||
|
if (unlikely(!chart->st)) {
|
||||||
|
chart->st = rrdset_create_localhost(
|
||||||
|
"system"
|
||||||
|
, chart->id
|
||||||
|
, NULL
|
||||||
|
, resource_info[i].family
|
||||||
|
, NULL
|
||||||
|
, chart->title
|
||||||
|
, "percentage"
|
||||||
|
, PLUGIN_PROC_NAME
|
||||||
|
, PLUGIN_PROC_MODULE_PRESSURE_NAME
|
||||||
|
, resource_info[i].section_priority + 40
|
||||||
|
, update_every
|
||||||
|
, RRDSET_TYPE_LINE
|
||||||
|
);
|
||||||
|
chart->rd10 = rrddim_add(chart->st, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
chart->rd60 = rrddim_add(chart->st, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
chart->rd300 = rrddim_add(chart->st, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
} else {
|
||||||
|
rrdset_next(chart->st);
|
||||||
|
}
|
||||||
|
|
||||||
|
chart->value10 = strtod(procfile_lineword(ff, 0, 2), NULL);
|
||||||
|
chart->value60 = strtod(procfile_lineword(ff, 0, 4), NULL);
|
||||||
|
chart->value300 = strtod(procfile_lineword(ff, 0, 6), NULL);
|
||||||
|
update_pressure_chart(chart);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (do_full && lines > 2) {
|
||||||
|
chart = &resources[i].full;
|
||||||
|
if (unlikely(!chart->st)) {
|
||||||
|
chart->st = rrdset_create_localhost(
|
||||||
|
"system"
|
||||||
|
, chart->id
|
||||||
|
, NULL
|
||||||
|
, resource_info[i].family
|
||||||
|
, NULL
|
||||||
|
, chart->title
|
||||||
|
, "percentage"
|
||||||
|
, PLUGIN_PROC_NAME
|
||||||
|
, PLUGIN_PROC_MODULE_PRESSURE_NAME
|
||||||
|
, resource_info[i].section_priority + 45
|
||||||
|
, update_every
|
||||||
|
, RRDSET_TYPE_LINE
|
||||||
|
);
|
||||||
|
chart->rd10 = rrddim_add(chart->st, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
chart->rd60 = rrddim_add(chart->st, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
chart->rd300 = rrddim_add(chart->st, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||||
|
} else {
|
||||||
|
rrdset_next(chart->st);
|
||||||
|
}
|
||||||
|
|
||||||
|
chart->value10 = strtod(procfile_lineword(ff, 1, 2), NULL);
|
||||||
|
chart->value60 = strtod(procfile_lineword(ff, 1, 4), NULL);
|
||||||
|
chart->value300 = strtod(procfile_lineword(ff, 1, 6), NULL);
|
||||||
|
update_pressure_chart(chart);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PRESSURE_NUM_RESOURCES == fail_count) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
31
collectors/proc.plugin/proc_pressure.h
Normal file
31
collectors/proc.plugin/proc_pressure.h
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
#ifndef NETDATA_PROC_PRESSURE_H
|
||||||
|
#define NETDATA_PROC_PRESSURE_H
|
||||||
|
|
||||||
|
#define PRESSURE_NUM_RESOURCES 3
|
||||||
|
|
||||||
|
struct pressure {
|
||||||
|
int updated;
|
||||||
|
char *filename;
|
||||||
|
|
||||||
|
struct pressure_chart {
|
||||||
|
int enabled;
|
||||||
|
|
||||||
|
const char *id;
|
||||||
|
const char *title;
|
||||||
|
|
||||||
|
double value10;
|
||||||
|
double value60;
|
||||||
|
double value300;
|
||||||
|
|
||||||
|
RRDSET *st;
|
||||||
|
RRDDIM *rd10;
|
||||||
|
RRDDIM *rd60;
|
||||||
|
RRDDIM *rd300;
|
||||||
|
} some, full;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern void update_pressure_chart(struct pressure_chart *chart);
|
||||||
|
|
||||||
|
#endif //NETDATA_PROC_PRESSURE_H
|
|
@ -718,6 +718,31 @@ netdataDashboard.context = {
|
||||||
height: 0.7
|
height: 0.7
|
||||||
},
|
},
|
||||||
|
|
||||||
|
'system.cpu_pressure': {
|
||||||
|
info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' +
|
||||||
|
'identifies and quantifies the disruptions caused by resource contentions. ' +
|
||||||
|
'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on CPU. ' +
|
||||||
|
'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
|
||||||
|
},
|
||||||
|
|
||||||
|
'system.memory_some_pressure': {
|
||||||
|
info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' +
|
||||||
|
'identifies and quantifies the disruptions caused by resource contentions. ' +
|
||||||
|
'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on memory. ' +
|
||||||
|
'The "full" line indicates the share of time in which <b>all non-idle</b> tasks are stalled on memory simultaneously. ' +
|
||||||
|
'In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. ' +
|
||||||
|
'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
|
||||||
|
},
|
||||||
|
|
||||||
|
'system.io_some_pressure': {
|
||||||
|
info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' +
|
||||||
|
'identifies and quantifies the disruptions caused by resource contentions. ' +
|
||||||
|
'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on I/O. ' +
|
||||||
|
'The "full" line indicates the share of time in which <b>all non-idle</b> tasks are stalled on I/O simultaneously. ' +
|
||||||
|
'In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. ' +
|
||||||
|
'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.'
|
||||||
|
},
|
||||||
|
|
||||||
'system.io': {
|
'system.io': {
|
||||||
info: function (os) {
|
info: function (os) {
|
||||||
var s = 'Total Disk I/O, for all physical disks. You can get detailed information about each disk at the <a href="#menu_disk">Disks</a> section and per application Disk usage at the <a href="#menu_apps">Applications Monitoring</a> section.';
|
var s = 'Total Disk I/O, for all physical disks. You can get detailed information about each disk at the <a href="#menu_disk">Disks</a> section and per application Disk usage at the <a href="#menu_apps">Applications Monitoring</a> section.';
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue