0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-28 14:42:31 +00:00

proc integrations ()

Co-authored-by: ilyam8 <ilya@netdata.cloud>
This commit is contained in:
Costa Tsaousis 2023-07-26 01:06:57 +03:00 committed by GitHub
parent accc426c8a
commit 065091c3f5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 1089 additions and 253 deletions

View file

@ -23,18 +23,6 @@
#define NETDATA_CHART_PRIO_SYSTEM_IO 150
#define NETDATA_CHART_PRIO_SYSTEM_PGPGIO 151
#define NETDATA_CHART_PRIO_SYSTEM_RAM 200
#define NETDATA_CHART_PRIO_SYSTEM_SWAP 201
#define NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS 202
#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO 250
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO 300
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO 301
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE 302
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE 303
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS 304
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT 305
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES 306
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE 307
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY 308
#define NETDATA_CHART_PRIO_SYSTEM_NET 500
#define NETDATA_CHART_PRIO_SYSTEM_IPV4 500 // freebsd only
#define NETDATA_CHART_PRIO_SYSTEM_IP 501
@ -87,17 +75,35 @@
#define NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE 1010
#define NETDATA_CHART_PRIO_MEM_SYSTEM_OOM_KILL 1020
#define NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED 1030
#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1040
#define NETDATA_CHART_PRIO_MEM_SWAP 1035
#define NETDATA_CHART_PRIO_MEM_SWAP_CALLS 1037
#define NETDATA_CHART_PRIO_MEM_SWAPIO 1038
#define NETDATA_CHART_PRIO_MEM_ZSWAP 1036
#define NETDATA_CHART_PRIO_MEM_ZSWAPIO 1037
#define NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO 1038
#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE 1039
#define NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE 1040
#define NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS 1041
#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT 1042
#define NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES 1043
#define NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE 1044
#define NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY 1045
#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1050
#define NETDATA_CHART_PRIO_MEM_KERNEL 1100
#define NETDATA_CHART_PRIO_MEM_SLAB 1200
#define NETDATA_CHART_PRIO_MEM_RECLAIMING 1210
#define NETDATA_CHART_PRIO_MEM_HIGH_LOW 1211
#define NETDATA_CHART_PRIO_MEM_CMA 1212
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES 1250
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1251
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1252
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1253
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1254
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1255
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1256
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1257
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS 1251
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1252
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1253
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1254
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1255
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1256
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1257
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1258
#define NETDATA_CHART_PRIO_MEM_DIRECTMAP 1260
#define NETDATA_CHART_PRIO_MEM_KSM 1300
#define NETDATA_CHART_PRIO_MEM_KSM_SAVINGS 1301
#define NETDATA_CHART_PRIO_MEM_KSM_RATIOS 1302

View file

@ -62,6 +62,8 @@ static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO;
static int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
@ -828,6 +830,7 @@ struct cgroup {
struct pressure cpu_pressure;
struct pressure io_pressure;
struct pressure memory_pressure;
struct pressure irq_pressure;
// per cgroup charts
RRDSET *st_cpu;
@ -1451,28 +1454,33 @@ static inline void cgroup2_read_pressure(struct pressure *res) {
return;
}
res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL);
res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL);
res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL);
res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8), NULL) / 1000; // us->ms
bool did_some = false, did_full = false;
if (lines > 2) {
res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL);
res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL);
res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL);
res->full.total_time.value_total = str2ull(procfile_lineword(ff, 1, 8), NULL) / 1000; // us->ms
for(size_t l = 0; l < lines ;l++) {
const char *key = procfile_lineword(ff, l, 0);
if(strcmp(key, "some") == 0) {
res->some.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL);
res->some.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL);
res->some.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL);
res->some.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms
did_some = true;
}
res->updated = 1;
if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) {
res->some.enabled = CONFIG_BOOLEAN_YES;
if (lines > 2) {
res->full.enabled = CONFIG_BOOLEAN_YES;
} else {
res->full.enabled = CONFIG_BOOLEAN_NO;
else if(strcmp(key, "full") == 0) {
res->full.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL);
res->full.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL);
res->full.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL);
res->full.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms
did_full = true;
}
}
res->updated = (did_full || did_some) ? 1 : 0;
if(unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO))
res->some.enabled = (did_some) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
if(unlikely(res->full.enabled == CONFIG_BOOLEAN_AUTO))
res->full.enabled = (did_full) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
}
}
@ -1637,6 +1645,7 @@ static inline void read_cgroup(struct cgroup *cg) {
cgroup2_read_pressure(&cg->cpu_pressure);
cgroup2_read_pressure(&cg->io_pressure);
cgroup2_read_pressure(&cg->memory_pressure);
cgroup2_read_pressure(&cg->irq_pressure);
cgroup_read_memory(&cg->memory, 1);
}
}
@ -1851,6 +1860,7 @@ static inline void cgroup_free(struct cgroup *cg) {
free_pressure(&cg->cpu_pressure);
free_pressure(&cg->io_pressure);
free_pressure(&cg->memory_pressure);
free_pressure(&cg->irq_pressure);
freez(cg->id);
freez(cg->intermediate_id);
@ -2465,6 +2475,18 @@ static inline void discovery_update_filenames() {
netdata_log_debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
}
}
if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) {
snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id);
if (likely(stat(filename, &buf) != -1)) {
cg->irq_pressure.filename = strdupz(filename);
cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some;
cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full;
netdata_log_debug(D_CGROUP, "irq.pressure filename for cgroup '%s': '%s'", cg->id, cg->irq_pressure.filename);
} else {
netdata_log_debug(D_CGROUP, "irq.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
}
}
}
}
}
@ -4643,6 +4665,112 @@ void update_cgroup_charts(int update_every) {
update_pressure_charts(pcs);
}
res = &cg->irq_pressure;
if (likely(res->updated && res->some.enabled)) {
struct pressure_charts *pcs;
pcs = &res->some;
if (unlikely(!pcs->share_time.st)) {
RRDSET *chart;
snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure");
chart = pcs->share_time.st = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
, "irq_some_pressure"
, NULL
, "interrupts"
, k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure"
, title
, "percentage"
, PLUGIN_CGROUPS_NAME
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
, cgroup_containers_chart_priority + 2310
, update_every
, RRDSET_TYPE_LINE
);
rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
}
if (unlikely(!pcs->total_time.st)) {
RRDSET *chart;
snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure stall time");
chart = pcs->total_time.st = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
, "irq_some_pressure_stall_time"
, NULL
, "interrupts"
, k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time"
, title
, "ms"
, PLUGIN_CGROUPS_NAME
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
, cgroup_containers_chart_priority + 2330
, update_every
, RRDSET_TYPE_LINE
);
rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
update_pressure_charts(pcs);
}
if (likely(res->updated && res->full.enabled)) {
struct pressure_charts *pcs;
pcs = &res->full;
if (unlikely(!pcs->share_time.st)) {
RRDSET *chart;
snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure");
chart = pcs->share_time.st = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
, "irq_full_pressure"
, NULL
, "interrupts"
, k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure"
, title
, "percentage"
, PLUGIN_CGROUPS_NAME
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
, cgroup_containers_chart_priority + 2350
, update_every
, RRDSET_TYPE_LINE
);
rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
}
if (unlikely(!pcs->total_time.st)) {
RRDSET *chart;
snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure stall time");
chart = pcs->total_time.st = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
, "irq_full_pressure_stall_time"
, NULL
, "interrupts"
, k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time"
, title
, "ms"
, PLUGIN_CGROUPS_NAME
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
, cgroup_containers_chart_priority + 2370
, update_every
, RRDSET_TYPE_LINE
);
rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
update_pressure_charts(pcs);
}
res = &cg->io_pressure;
if (likely(res->updated && res->some.enabled)) {

View file

@ -38,7 +38,7 @@ static struct netdata_zswap_metric zswap_calculated_metrics[] = {
.charttype = RRDSET_TYPE_LINE,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO,
.divisor = 100,
.convertv = NULL,
.value = -1},
@ -71,7 +71,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_AREA,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE,
.divisor = 1,
.convertv = NULL,
.value = -1},
@ -84,7 +84,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_AREA,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE,
.divisor = 1,
.convertv = pages_to_bytes,
.value = -1},
@ -97,7 +97,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_LINE,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT,
.divisor = 1,
.convertv = NULL,
.value = -1},
@ -110,7 +110,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_AREA,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES,
.divisor = 1,
.convertv = pages_to_bytes,
.value = -1},
@ -123,7 +123,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_AREA,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE,
.divisor = 1,
.convertv = pages_to_bytes,
.value = -1},
@ -136,7 +136,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_LINE,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY,
.divisor = 1,
.convertv = NULL,
.value = -1},
@ -175,7 +175,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@ -188,7 +188,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@ -201,7 +201,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@ -214,7 +214,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@ -227,7 +227,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@ -266,7 +266,7 @@ zswap_send_chart(struct netdata_zswap_metric *metric, int update_every, const ch
{
fprintf(
stdout,
"CHART system.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n",
"CHART mem.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n",
metric->chart_id,
metric->title,
metric->units,
@ -291,7 +291,7 @@ static void zswap_send_dimension(struct netdata_zswap_metric *metric)
static void zswap_send_begin(struct netdata_zswap_metric *metric)
{
fprintf(stdout, "BEGIN system.zswap_%s\n", metric->chart_id);
fprintf(stdout, "BEGIN mem.zswap_%s\n", metric->chart_id);
}
static void zswap_send_set(struct netdata_zswap_metric *metric)

View file

@ -322,13 +322,13 @@ void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em)
*/
static void ebpf_obsolete_swap_global(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP,
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_MEM_SWAP_CHART,
"Calls to access swap memory",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE,
NULL,
NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS,
NETDATA_CHART_PRIO_MEM_SWAP_CALLS,
em->update_every);
}
@ -914,12 +914,12 @@ static void ebpf_swap_allocate_global_vectors(int apps)
*/
static void ebpf_create_swap_charts(int update_every)
{
ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_MEM_SWAP_CHART,
ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_MEM_SWAP_CHART,
"Calls to access swap memory",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU,
NULL,
NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS,
NETDATA_CHART_PRIO_MEM_SWAP_CALLS,
ebpf_create_global_dimension,
swap_publish_aggregated, NETDATA_SWAP_END,
update_every, NETDATA_EBPF_MODULE_NAME_SWAP);

View file

@ -1035,7 +1035,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) {
if (unlikely(!st)) {
st = rrdset_create_localhost(
"system",
"mem",
"swapio",
NULL,
"swap",
@ -1044,7 +1044,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) {
"KiB/s",
"freebsd.plugin",
"vm.stats.vm.v_swappgs",
NETDATA_CHART_PRIO_SYSTEM_SWAPIO,
NETDATA_CHART_PRIO_MEM_SWAPIO,
update_every,
RRDSET_TYPE_AREA
);

View file

@ -20,27 +20,50 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "This integration provides a collection of statistics about the system such as CPU utilization, process counts and more."
metrics_description: |
CPU utilization, states and frequencies and key Linux system performance metrics.
The `/proc/stat` file provides various types of system statistics:
- The overall system CPU usage statistics
- Per CPU core statistics
- The total context switching of the system
- The total number of processes running
- The total CPU interrupts
- The total CPU softirqs
The collector also reads:
- `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
- `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
- `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
- `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
- `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
- `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
- `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
method_description: ""
supported_platforms:
include: []
include: [ "linux" ]
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
auto_detection:
description: ""
description: |
The collector auto-detects all metrics. No configuration is needed.
limits:
description: ""
performance_impact:
description: ""
description: |
The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
setup:
prerequisites:
list: []
configuration:
file:
name: ""
section_name: "plugin:proc:/proc/stat"
name: "netdata.conf"
description: ""
options:
description: ""
@ -187,12 +210,43 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: ""
metrics_description: |
Entropy, a measure of the randomness or unpredictability of data.
In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
vulnerable to attacks that exploit the predictability of the generated keys.
In most operating systems, entropy is generated by collecting random events from various sources, such as
hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
of entropy, which is then used to generate random numbers when needed.
The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
which blocks until enough entropy is available to generate the requested numbers. This ensures that the
generated numbers are truly random and not predictable.
However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
programs that rely on random numbers to slow down or even freeze. This is especially problematic for
cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
software-based sources.
One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
high-quality entropy, which can be used to seed the pool of entropy in the operating system.
Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
method_description: ""
supported_platforms:
include: []
include: [ "linux" ]
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -264,12 +318,23 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "This integration provides the amount of time the system has been up (running)."
metrics_description: |
The amount of time the system has been up (running).
Uptime is a critical aspect of overall system performance:
- **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
- **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
- **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
- **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
- **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
- **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
- **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
method_description: ""
supported_platforms:
include: []
include: [ "linux" ]
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -340,12 +405,33 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "This integration provides information about process, memory, swap space utilization and many more."
metrics_description: |
Linux Virtual memory subsystem.
Information about memory management, indicating how effectively the kernel allocates and frees
memory resources in response to system demands.
Monitors page faults, which occur when a process requests a portion of its memory that isn't
immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
provide insights into application behavior.
Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
a compressed cache for swap pages, and provides insights into its usage and performance implications.
In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
memory resources between host and guest systems.
For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
can impact the performance based on the memory access times.
The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
of memory resources.
method_description: ""
supported_platforms:
include: []
include: [ "linux" ]
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -535,7 +621,37 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "Monitor Interrupts metrics for efficient processor interrupt handling."
metrics_description: |
Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
The numbers reported are the counts of the interrupts that have occurred of each type.
An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
immediate attention. The processor then interrupts its current activities and executes the interrupt handler
to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
The types of interrupts include:
- **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
- **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
used to switch the CPU among different tasks.
- **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
- **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
Monitoring `/proc/interrupts` can be used for:
- **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
performance degradation.
- **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
- **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
understand what your system is doing. It can provide insights into the system's interaction with hardware,
drivers, and other parts of the kernel.
method_description: ""
supported_platforms:
include: []
@ -621,12 +737,35 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "This integration provides the system load average for the last 1, 5, and 15 minutes."
metrics_description: |
The `/proc/loadavg` file provides information about the system load average.
The load average is a measure of the amount of computational work that a system performs. It is a
representation of the average system load over a period of time.
This file contains three numbers representing the system load averages for the last 1, 5, and 15 minutes,
respectively. It also includes the currently running processes and the total number of processes.
Monitoring the load average can be used for:
- **System performance**: If the load average is too high, it may indicate that your system is overloaded.
On a system with a single CPU, if the load average is 1, it means the single CPU is fully utilized. If the
load averages are consistently higher than the number of CPUs/cores, it may indicate that your system is
overloaded and tasks are waiting for CPU time.
- **Troubleshooting**: If the load average is unexpectedly high, it can be a sign of a problem. This could be
due to a runaway process, a software bug, or a hardware issue.
- **Capacity planning**: By monitoring the load average over time, you can understand the trends in your
system's workload. This can help with capacity planning and scaling decisions.
Remember that load average not only considers CPU usage, but also includes processes waiting for disk I/O.
Therefore, high load averages could be due to I/O contention as well as CPU contention.
method_description: ""
supported_platforms:
include: []
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -726,12 +865,35 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "This integration measures system pressure, which can indicate resource shortages in CPU, memory, or I/O."
metrics_description: |
Introduced in Linux kernel 4.20, `/proc/pressure` provides information about system pressure stall information
(PSI). PSI is a feature that allows the system to track the amount of time the system is stalled due to
resource contention, such as CPU, memory, or I/O.
The collectors monitored 3 separate files for CPU, memory, and I/O:
- **cpu**: Tracks the amount of time tasks are stalled due to CPU contention.
- **memory**: Tracks the amount of time tasks are stalled due to memory contention.
- **io**: Tracks the amount of time tasks are stalled due to I/O contention.
- **irq**: Tracks the amount of time tasks are stalled due to IRQ contention.
Each of them provides metrics for stall time over the last 10 seconds, 1 minute, 5 minutes, and 15 minutes.
Monitoring the /proc/pressure files can provide important insights into system performance and capacity planning:
- **Identifying resource contention**: If these metrics are consistently high, it indicates that tasks are
frequently being stalled due to lack of resources, which can significantly degrade system performance.
- **Troubleshooting performance issues**: If a system is experiencing performance issues, these metrics can
help identify whether resource contention is the cause.
- **Capacity planning**: By monitoring these metrics over time, you can understand trends in resource
utilization and make informed decisions about when to add more resources to your system.
method_description: ""
supported_platforms:
include: []
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -878,7 +1040,29 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "Monitor SoftIRQs metrics for efficient software interrupt operations."
metrics_description: |
In the Linux kernel, handling of hardware interrupts is split into two halves: the top half and the bottom half.
The top half is the routine that responds immediately to an interrupt, while the bottom half is deferred to be processed later.
Softirqs are a mechanism in the Linux kernel used to handle the bottom halves of interrupts, which can be
deferred and processed later in a context where it's safe to enable interrupts.
The actual work of handling the interrupt is offloaded to a softirq and executed later when the system
decides it's a good time to process them. This helps to keep the system responsive by not blocking the top
half for too long, which could lead to missed interrupts.
Monitoring `/proc/softirqs` is useful for:
- **Performance tuning**: A high rate of softirqs could indicate a performance issue. For instance, a high
rate of network softirqs (`NET_RX` and `NET_TX`) could indicate a network performance issue.
- **Troubleshooting**: If a system is behaving unexpectedly, checking the softirqs could provide clues about
what is going on. For example, a sudden increase in block device softirqs (BLOCK) might indicate a problem
with a disk.
- **Understanding system behavior**: Knowing what types of softirqs are happening can help you understand what
your system is doing, particularly in terms of how it's interacting with hardware and how it's handling
interrupts.
method_description: ""
supported_platforms:
include: []
@ -963,7 +1147,29 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "This integration provides statistics on Softnet, such as processed events, dropped events and more."
metrics_description: |
`/proc/net/softnet_stat` provides statistics that relate to the handling of network packets by softirq.
It provides information about:
- Total number of processed packets (`processed`).
- Times ksoftirq ran out of quota (`dropped`).
- Times net_rx_action was rescheduled.
- Number of times processed all lists before quota.
- Number of times did not process all lists due to quota.
- Number of times net_rx_action was rescheduled for GRO (Generic Receive Offload) cells.
- Number of times GRO cells were processed.
Monitoring the /proc/net/softnet_stat file can be useful for:
- **Network performance monitoring**: By tracking the total number of processed packets and how many packets
were dropped, you can gain insights into your system's network performance.
- **Troubleshooting**: If you're experiencing network-related issues, this collector can provide valuable clues.
For instance, a high number of dropped packets may indicate a network problem.
- **Capacity planning**: If your system is consistently processing near its maximum capacity of network
packets, it might be time to consider upgrading your network infrastructure.
method_description: ""
supported_platforms:
include: []
@ -1069,12 +1275,28 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "Monitor memory usage metrics for efficient system memory management."
metrics_description: |
`/proc/meminfo` provides detailed information about the system's current memory usage. It includes information
about different types of memory, RAM, Swap, ZSwap, HugePages, Transparent HugePages (THP), Kernel memory,
SLAB memory, memory mappings, and more.
Monitoring /proc/meminfo can be useful for:
- **Performance Tuning**: Understanding your system's memory usage can help you make decisions about system
tuning and optimization. For example, if your system is frequently low on free memory, it might benefit
from more RAM.
- **Troubleshooting**: If your system is experiencing problems, `/proc/meminfo` can provide clues about
whether memory usage is a factor. For example, if your system is slow and cached swap is high, it could
mean that your system is swapping out a lot of memory to disk, which can degrade performance.
- **Capacity Planning**: By monitoring memory usage over time, you can understand trends and make informed
decisions about future capacity needs.
method_description: ""
supported_platforms:
include: []
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -1152,13 +1374,26 @@ modules:
chart_type: area
dimensions:
- name: avail
- name: system.swap
- name: mem.swap
description: System Swap
unit: "MiB"
chart_type: stacked
dimensions:
- name: free
- name: used
- name: mem.swap_cached
description: Swap Memory Cached in RAM
unit: "MiB"
chart_type: stacked
dimensions:
- name: cached
- name: mem.zswap
description: Zswap Usage
unit: "MiB"
chart_type: stacked
dimensions:
- name: in-ram
- name: on-disk
- name: mem.hwcorrupt
description: Corrupted Memory detected by ECC
unit: "MiB"
@ -1198,7 +1433,7 @@ modules:
dimensions:
- name: reclaimable
- name: unreclaimable
- name: mem.hugepage
- name: mem.hugepages
description: Dedicated HugePages Memory
unit: "MiB"
chart_type: stacked
@ -1207,13 +1442,59 @@ modules:
- name: used
- name: surplus
- name: reserved
- name: mem.transparent_hugepages
- name: mem.thp
description: Transparent HugePages Memory
unit: "MiB"
chart_type: stacked
dimensions:
- name: anonymous
- name: shmem
- name: mem.thp_details
description: Details of Transparent HugePages Usage
unit: "MiB"
chart_type: line
dimensions:
- name: ShmemPmdMapped
- name: FileHugePages
- name: FilePmdMapped
- name: mem.reclaiming
description: Memory Reclaiming
unit: "MiB"
chart_type: line
dimensions:
- name: Active
- name: Inactive
- name: Active(anon)
- name: Inactive(anon)
- name: Active(file)
- name: Inactive(file)
- name: Unevictable
- name: Mlocked
- name: mem.high_low
description: High and Low Used and Free Memory Areas
unit: "MiB"
chart_type: stacked
dimensions:
- name: high_used
- name: low_used
- name: high_free
- name: low_free
- name: mem.cma
description: Contiguous Memory Allocator (CMA) Memory
unit: "MiB"
chart_type: stacked
dimensions:
- name: used
- name: free
- name: mem.directmaps
description: Direct Memory Mappings
unit: "MiB"
chart_type: stacked
dimensions:
- name: 4k
- name: 2m
- name: 4m
- name: 1g
- meta:
plugin_name: proc.plugin
module_name: /proc/pagetypeinfo
@ -1238,7 +1519,7 @@ modules:
supported_platforms:
include: []
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -1326,7 +1607,20 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "This integration monitors system memory errors detected and corrected by ECC RAM."
metrics_description: |
The Error Detection and Correction (EDAC) subsystem is detecting and reporting errors in the system's memory,
primarily ECC (Error-Correcting Code) memory errors.
The collector provides data for:
- Per memory controller (MC): correctable and uncorrectable errors. These can be of 2 kinds:
- errors related to a DIMM
- errors that cannot be associated with a DIMM
- Per memory DIMM: correctable and uncorrectable errors. There are 2 kinds:
- memory controllers that can identify the physical DIMMS and report errors directly for them,
- memory controllers that report errors for memory address ranges that can be linked to dimms.
In this case the DIMMS reported may be more than the physical DIMMS installed.
method_description: ""
supported_platforms:
include: []
@ -1423,7 +1717,7 @@ modules:
- name: dimm_location
description: Location of the memory module.
- name: dimm_mem_type
description: Type of the memory module. Usually either buffered or unbuffered memory.
description: Type of the memory module.
- name: size
description: The amount of memory in megabytes that this memory module manages.
metrics:
@ -1453,7 +1747,19 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "Monitor NUMA metrics for efficient non-uniform memory access operations."
metrics_description: |
Information about NUMA (Non-Uniform Memory Access) nodes on the system.
NUMA is a method of configuring a cluster of microprocessor in a multiprocessing system so that they can
share memory locally, improving performance and the ability of the system to be expanded. NUMA is used in a
symmetric multiprocessing (SMP) system.
In a NUMA system, processors, memory, and I/O devices are grouped together into cells, also known as nodes.
Each node has its own memory and set of I/O devices, and one or more processors. While a processor can access
memory in any of the nodes, it does so faster when accessing memory within its own node.
The collector provides statistics on memory allocations for processes running on the NUMA nodes, revealing the
efficiency of memory allocations in multi-node systems.
method_description: ""
supported_platforms:
include: []
@ -1530,15 +1836,23 @@ modules:
description: ""
keywords:
- ksm
- samepage
- merging
most_popular: false
overview:
data_collection:
metrics_description: "Examine KSM metrics for insights into memory deduplication operations."
metrics_description: |
Kernel Samepage Merging (KSM) is a memory-saving feature in Linux that enables the kernel to examine the
memory of different processes and identify identical pages. It then merges these identical pages into a
single page that the processes share. This is particularly useful for virtualization, where multiple virtual
machines might be running the same operating system or applications and have many identical pages.
The collector provides information about the operation and effectiveness of KSM on your system.
method_description: ""
supported_platforms:
include: []
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -1622,7 +1936,11 @@ modules:
most_popular: false
overview:
data_collection:
metrics_description: "This integration monitors ZRAM usage, compression ratios, and more."
metrics_description: |
zRAM, or compressed RAM, is a block device that uses a portion of your system's RAM as a block device.
The data written to this block device is compressed and stored in memory.
The collectors provides information about the operation and the effectiveness of zRAM on your system.
method_description: ""
supported_platforms:
include: []
@ -1715,15 +2033,30 @@ modules:
keywords:
- ipc
- semaphores
- shared memory
most_popular: false
overview:
data_collection:
metrics_description: "Monitor Inter Process Communication performance for optimal process interaction."
metrics_description: |
IPC stands for Inter-Process Communication. It is a mechanism which allows processes to communicate with each
other and synchronize their actions.
This collector exposes information about:
- Message Queues: This allows messages to be exchanged between processes. It's a more flexible method that
allows messages to be placed onto a queue and read at a later time.
- Shared Memory: This method allows for the fastest form of IPC because processes can exchange data by
reading/writing into shared memory segments.
- Semaphores: They are used to synchronize the operations performed by independent processes. So, if multiple
processes are trying to access a single shared resource, semaphores can ensure that only one process
accesses the resource at a given time.
method_description: ""
supported_platforms:
include: []
exclude: []
multi_instance: true
multi_instance: false
additional_permissions:
description: ""
default_behavior:
@ -1828,13 +2161,18 @@ modules:
description: ""
keywords:
- disk
- disks
- io
- bcache
- block devices
most_popular: false
overview:
data_collection:
metrics_description: "This integration provides statistics about disk and Bcache I/O."
metrics_description: |
Detailed statistics for each of your system's disk devices and partitions.
The data is reported by the kernel and can be used to monitor disk activity on a Linux system.
Get valuable insight into how your disks are performing and where potential bottlenecks might be.
method_description: ""
supported_platforms:
include: []

View file

@ -9,58 +9,92 @@ int do_proc_meminfo(int update_every, usec_t dt) {
(void)dt;
static procfile *ff = NULL;
static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1, do_hugepages = -1, do_transparent_hugepages = -1;
static int do_percpu = 0;
static int do_ram = -1
, do_swap = -1
, do_hwcorrupt = -1
, do_committed = -1
, do_writeback = -1
, do_kernel = -1
, do_slab = -1
, do_hugepages = -1
, do_transparent_hugepages = -1
, do_reclaiming = -1
, do_high_low = -1
, do_cma = -1
, do_directmap = -1;
static ARL_BASE *arl_base = NULL;
static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL;
static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL, *arl_hugepages_total = NULL,
*arl_zswapped = NULL, *arl_high_low = NULL, *arl_cma_total = NULL,
*arl_directmap4k = NULL, *arl_directmap2m = NULL, *arl_directmap4m = NULL, *arl_directmap1g = NULL;
static unsigned long long
MemTotal = 0,
MemFree = 0,
MemAvailable = 0,
Buffers = 0,
Cached = 0,
//SwapCached = 0,
//Active = 0,
//Inactive = 0,
//ActiveAnon = 0,
//InactiveAnon = 0,
//ActiveFile = 0,
//InactiveFile = 0,
//Unevictable = 0,
//Mlocked = 0,
SwapTotal = 0,
SwapFree = 0,
Dirty = 0,
Writeback = 0,
//AnonPages = 0,
//Mapped = 0,
Shmem = 0,
Slab = 0,
SReclaimable = 0,
SUnreclaim = 0,
KernelStack = 0,
PageTables = 0,
NFS_Unstable = 0,
Bounce = 0,
WritebackTmp = 0,
//CommitLimit = 0,
Committed_AS = 0,
//VmallocTotal = 0,
VmallocUsed = 0,
//VmallocChunk = 0,
Percpu = 0,
AnonHugePages = 0,
ShmemHugePages = 0,
HugePages_Total = 0,
HugePages_Free = 0,
HugePages_Rsvd = 0,
HugePages_Surp = 0,
Hugepagesize = 0,
//DirectMap4k = 0,
//DirectMap2M = 0,
HardwareCorrupted = 0;
MemTotal = 0
, MemFree = 0
, MemAvailable = 0
, Buffers = 0
, Cached = 0
, SwapCached = 0
, Active = 0
, Inactive = 0
, ActiveAnon = 0
, InactiveAnon = 0
, ActiveFile = 0
, InactiveFile = 0
, Unevictable = 0
, Mlocked = 0
, HighTotal = 0
, HighFree = 0
, LowTotal = 0
, LowFree = 0
, MmapCopy = 0
, SwapTotal = 0
, SwapFree = 0
, Zswap = 0
, Zswapped = 0
, Dirty = 0
, Writeback = 0
, AnonPages = 0
, Mapped = 0
, Shmem = 0
, KReclaimable = 0
, Slab = 0
, SReclaimable = 0
, SUnreclaim = 0
, KernelStack = 0
, ShadowCallStack = 0
, PageTables = 0
, SecPageTables = 0
, NFS_Unstable = 0
, Bounce = 0
, WritebackTmp = 0
, CommitLimit = 0
, Committed_AS = 0
, VmallocTotal = 0
, VmallocUsed = 0
, VmallocChunk = 0
, Percpu = 0
//, EarlyMemtestBad = 0
, HardwareCorrupted = 0
, AnonHugePages = 0
, ShmemHugePages = 0
, ShmemPmdMapped = 0
, FileHugePages = 0
, FilePmdMapped = 0
, CmaTotal = 0
, CmaFree = 0
//, Unaccepted = 0
, HugePages_Total = 0
, HugePages_Free = 0
, HugePages_Rsvd = 0
, HugePages_Surp = 0
, Hugepagesize = 0
//, Hugetlb = 0
, DirectMap4k = 0
, DirectMap2M = 0
, DirectMap4M = 0
, DirectMap1G = 0
;
if(unlikely(!arl_base)) {
do_ram = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "system ram", 1);
@ -72,6 +106,12 @@ int do_proc_meminfo(int update_every, usec_t dt) {
do_slab = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "slab memory", 1);
do_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "hugepages", CONFIG_BOOLEAN_AUTO);
do_transparent_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "transparent hugepages", CONFIG_BOOLEAN_AUTO);
do_reclaiming = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "memory reclaiming", CONFIG_BOOLEAN_AUTO);
do_high_low = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "high low memory", CONFIG_BOOLEAN_AUTO);
do_cma = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "cma memory", CONFIG_BOOLEAN_AUTO);
do_directmap = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "direct maps", CONFIG_BOOLEAN_AUTO);
// https://github.com/torvalds/linux/blob/master/fs/proc/meminfo.c
arl_base = arl_create("meminfo", NULL, 60);
arl_expect(arl_base, "MemTotal", &MemTotal);
@ -79,46 +119,90 @@ int do_proc_meminfo(int update_every, usec_t dt) {
arl_memavailable = arl_expect(arl_base, "MemAvailable", &MemAvailable);
arl_expect(arl_base, "Buffers", &Buffers);
arl_expect(arl_base, "Cached", &Cached);
//arl_expect(arl_base, "SwapCached", &SwapCached);
//arl_expect(arl_base, "Active", &Active);
//arl_expect(arl_base, "Inactive", &Inactive);
//arl_expect(arl_base, "ActiveAnon", &ActiveAnon);
//arl_expect(arl_base, "InactiveAnon", &InactiveAnon);
//arl_expect(arl_base, "ActiveFile", &ActiveFile);
//arl_expect(arl_base, "InactiveFile", &InactiveFile);
//arl_expect(arl_base, "Unevictable", &Unevictable);
//arl_expect(arl_base, "Mlocked", &Mlocked);
arl_expect(arl_base, "SwapCached", &SwapCached);
arl_expect(arl_base, "Active", &Active);
arl_expect(arl_base, "Inactive", &Inactive);
arl_expect(arl_base, "Active(anon)", &ActiveAnon);
arl_expect(arl_base, "Inactive(anon)", &InactiveAnon);
arl_expect(arl_base, "Active(file)", &ActiveFile);
arl_expect(arl_base, "Inactive(file)", &InactiveFile);
arl_expect(arl_base, "Unevictable", &Unevictable);
arl_expect(arl_base, "Mlocked", &Mlocked);
// CONFIG_HIGHMEM
arl_high_low = arl_expect(arl_base, "HighTotal", &HighTotal);
arl_expect(arl_base, "HighFree", &HighFree);
arl_expect(arl_base, "LowTotal", &LowTotal);
arl_expect(arl_base, "LowFree", &LowFree);
// CONFIG_MMU
arl_expect(arl_base, "MmapCopy", &MmapCopy);
arl_expect(arl_base, "SwapTotal", &SwapTotal);
arl_expect(arl_base, "SwapFree", &SwapFree);
// CONFIG_ZSWAP
arl_zswapped = arl_expect(arl_base, "Zswap", &Zswap);
arl_expect(arl_base, "Zswapped", &Zswapped);
arl_expect(arl_base, "Dirty", &Dirty);
arl_expect(arl_base, "Writeback", &Writeback);
//arl_expect(arl_base, "AnonPages", &AnonPages);
//arl_expect(arl_base, "Mapped", &Mapped);
arl_expect(arl_base, "AnonPages", &AnonPages);
arl_expect(arl_base, "Mapped", &Mapped);
arl_expect(arl_base, "Shmem", &Shmem);
arl_expect(arl_base, "KReclaimable", &KReclaimable);
arl_expect(arl_base, "Slab", &Slab);
arl_expect(arl_base, "SReclaimable", &SReclaimable);
arl_expect(arl_base, "SUnreclaim", &SUnreclaim);
arl_expect(arl_base, "KernelStack", &KernelStack);
// CONFIG_SHADOW_CALL_STACK
arl_expect(arl_base, "ShadowCallStack", &ShadowCallStack);
arl_expect(arl_base, "PageTables", &PageTables);
arl_expect(arl_base, "SecPageTables", &SecPageTables);
arl_expect(arl_base, "NFS_Unstable", &NFS_Unstable);
arl_expect(arl_base, "Bounce", &Bounce);
arl_expect(arl_base, "WritebackTmp", &WritebackTmp);
//arl_expect(arl_base, "CommitLimit", &CommitLimit);
arl_expect(arl_base, "CommitLimit", &CommitLimit);
arl_expect(arl_base, "Committed_AS", &Committed_AS);
//arl_expect(arl_base, "VmallocTotal", &VmallocTotal);
arl_expect(arl_base, "VmallocTotal", &VmallocTotal);
arl_expect(arl_base, "VmallocUsed", &VmallocUsed);
//arl_expect(arl_base, "VmallocChunk", &VmallocChunk);
arl_expect(arl_base, "VmallocChunk", &VmallocChunk);
arl_expect(arl_base, "Percpu", &Percpu);
// CONFIG_MEMTEST
//arl_expect(arl_base, "EarlyMemtestBad", &EarlyMemtestBad);
// CONFIG_MEMORY_FAILURE
arl_hwcorrupted = arl_expect(arl_base, "HardwareCorrupted", &HardwareCorrupted);
// CONFIG_TRANSPARENT_HUGEPAGE
arl_expect(arl_base, "AnonHugePages", &AnonHugePages);
arl_expect(arl_base, "ShmemHugePages", &ShmemHugePages);
arl_expect(arl_base, "HugePages_Total", &HugePages_Total);
arl_expect(arl_base, "ShmemPmdMapped", &ShmemPmdMapped);
arl_expect(arl_base, "FileHugePages", &FileHugePages);
arl_expect(arl_base, "FilePmdMapped", &FilePmdMapped);
// CONFIG_CMA
arl_cma_total = arl_expect(arl_base, "CmaTotal", &CmaTotal);
arl_expect(arl_base, "CmaFree", &CmaFree);
// CONFIG_UNACCEPTED_MEMORY
//arl_expect(arl_base, "Unaccepted", &Unaccepted);
// these appear only when hugepages are supported
arl_hugepages_total = arl_expect(arl_base, "HugePages_Total", &HugePages_Total);
arl_expect(arl_base, "HugePages_Free", &HugePages_Free);
arl_expect(arl_base, "HugePages_Rsvd", &HugePages_Rsvd);
arl_expect(arl_base, "HugePages_Surp", &HugePages_Surp);
arl_expect(arl_base, "Hugepagesize", &Hugepagesize);
//arl_expect(arl_base, "DirectMap4k", &DirectMap4k);
//arl_expect(arl_base, "DirectMap2M", &DirectMap2M);
//arl_expect(arl_base, "Hugetlb", &Hugetlb);
arl_directmap4k = arl_expect(arl_base, "DirectMap4k", &DirectMap4k);
arl_directmap2m = arl_expect(arl_base, "DirectMap2M", &DirectMap2M);
arl_directmap4m = arl_expect(arl_base, "DirectMap4M", &DirectMap4M);
arl_directmap1g = arl_expect(arl_base, "DirectMap1G", &DirectMap1G);
}
if(unlikely(!ff)) {
@ -136,26 +220,17 @@ int do_proc_meminfo(int update_every, usec_t dt) {
size_t lines = procfile_lines(ff), l;
arl_begin(arl_base);
static int first_ff_read = 1;
for(l = 0; l < lines ;l++) {
size_t words = procfile_linewords(ff, l);
if(unlikely(words < 2)) continue;
if (first_ff_read && !strcmp(procfile_lineword(ff, l, 0), "Percpu"))
do_percpu = 1;
if(unlikely(arl_check(arl_base,
procfile_lineword(ff, l, 0),
procfile_lineword(ff, l, 1)))) break;
}
if (first_ff_read)
first_ff_read = 0;
// http://calimeroteknik.free.fr/blag/?article20/really-used-memory-on-gnu-linux
unsigned long long MemCached = Cached + SReclaimable - Shmem;
unsigned long long MemCached = Cached + SReclaimable + KReclaimable - Shmem;
unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers;
// The Linux kernel doesn't report ZFS ARC usage as cache memory (the ARC is included in the total used system memory)
if (!inside_lxc_container) {
@ -207,7 +282,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
"mem"
, "available"
, NULL
, "system"
, "overview"
, NULL
, "Available RAM for applications"
, "MiB"
@ -238,7 +313,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
if(unlikely(!st_system_swap)) {
st_system_swap = rrdset_create_localhost(
"system"
"mem"
, "swap"
, NULL
, "swap"
@ -247,7 +322,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_SYSTEM_SWAP
, NETDATA_CHART_PRIO_MEM_SWAP
, update_every
, RRDSET_TYPE_STACKED
);
@ -261,6 +336,62 @@ int do_proc_meminfo(int update_every, usec_t dt) {
rrddim_set_by_pointer(st_system_swap, rd_used, SwapUsed);
rrddim_set_by_pointer(st_system_swap, rd_free, SwapFree);
rrdset_done(st_system_swap);
{
static RRDSET *st_mem_swap_cached = NULL;
static RRDDIM *rd_cached = NULL;
if (unlikely(!st_mem_swap_cached)) {
st_mem_swap_cached = rrdset_create_localhost(
"mem"
, "swap_cached"
, NULL
, "swap"
, NULL
, "Swap Memory Cached in RAM"
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_SWAP + 1
, update_every
, RRDSET_TYPE_AREA
);
rd_cached = rrddim_add(st_mem_swap_cached, "cached", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
rrddim_set_by_pointer(st_mem_swap_cached, rd_cached, SwapCached);
rrdset_done(st_mem_swap_cached);
}
if(arl_zswapped->flags & ARL_ENTRY_FLAG_FOUND) {
static RRDSET *st_mem_zswap = NULL;
static RRDDIM *rd_zswap = NULL, *rd_zswapped = NULL;
if (unlikely(!st_mem_zswap)) {
st_mem_zswap = rrdset_create_localhost(
"mem"
, "zswap"
, NULL
, "zswap"
, NULL
, "Zswap Usage"
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_ZSWAP
, update_every
, RRDSET_TYPE_STACKED
);
rd_zswap = rrddim_add(st_mem_zswap, "zswap", "in-ram", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_zswapped = rrddim_add(st_mem_zswap, "zswapped", "on-disk", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
rrddim_set_by_pointer(st_mem_zswap, rd_zswap, Zswap);
rrddim_set_by_pointer(st_mem_zswap, rd_zswapped, Zswapped);
rrdset_done(st_mem_zswap);
}
}
if(arl_hwcorrupted->flags & ARL_ENTRY_FLAG_FOUND &&
@ -306,7 +437,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
"mem"
, "committed"
, NULL
, "system"
, "overview"
, NULL
, "Committed (Allocated) Memory"
, "MiB"
@ -335,7 +466,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
"mem"
, "writeback"
, NULL
, "kernel"
, "writeback"
, NULL
, "Writeback Memory"
, "MiB"
@ -367,7 +498,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
if(do_kernel) {
static RRDSET *st_mem_kernel = NULL;
static RRDDIM *rd_slab = NULL, *rd_kernelstack = NULL, *rd_pagetables = NULL, *rd_vmallocused = NULL,
*rd_percpu = NULL;
*rd_percpu = NULL, *rd_kreclaimable = NULL;
if(unlikely(!st_mem_kernel)) {
st_mem_kernel = rrdset_create_localhost(
@ -391,16 +522,16 @@ int do_proc_meminfo(int update_every, usec_t dt) {
rd_kernelstack = rrddim_add(st_mem_kernel, "KernelStack", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_pagetables = rrddim_add(st_mem_kernel, "PageTables", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_vmallocused = rrddim_add(st_mem_kernel, "VmallocUsed", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
if (do_percpu)
rd_percpu = rrddim_add(st_mem_kernel, "Percpu", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_kreclaimable = rrddim_add(st_mem_kernel, "KReclaimable", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
rrddim_set_by_pointer(st_mem_kernel, rd_slab, Slab);
rrddim_set_by_pointer(st_mem_kernel, rd_kernelstack, KernelStack);
rrddim_set_by_pointer(st_mem_kernel, rd_pagetables, PageTables);
rrddim_set_by_pointer(st_mem_kernel, rd_vmallocused, VmallocUsed);
if (do_percpu)
rrddim_set_by_pointer(st_mem_kernel, rd_percpu, Percpu);
rrddim_set_by_pointer(st_mem_kernel, rd_kreclaimable, KReclaimable);
rrdset_done(st_mem_kernel);
}
@ -436,9 +567,10 @@ int do_proc_meminfo(int update_every, usec_t dt) {
rrdset_done(st_mem_slab);
}
if(do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO &&
if(arl_hugepages_total->flags & ARL_ENTRY_FLAG_FOUND &&
(do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO &&
((Hugepagesize && HugePages_Total) ||
netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)))) {
do_hugepages = CONFIG_BOOLEAN_YES;
static RRDSET *st_mem_hugepages = NULL;
@ -455,7 +587,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1
, NETDATA_CHART_PRIO_MEM_HUGEPAGES
, update_every
, RRDSET_TYPE_STACKED
);
@ -487,7 +619,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
if(unlikely(!st_mem_transparent_hugepages)) {
st_mem_transparent_hugepages = rrdset_create_localhost(
"mem"
, "transparent_hugepages"
, "thp"
, NULL
, "hugepages"
, NULL
@ -495,7 +627,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_HUGEPAGES
, NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1
, update_every
, RRDSET_TYPE_STACKED
);
@ -509,6 +641,206 @@ int do_proc_meminfo(int update_every, usec_t dt) {
rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_anonymous, AnonHugePages);
rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_shared, ShmemHugePages);
rrdset_done(st_mem_transparent_hugepages);
{
static RRDSET *st_mem_thp_details = NULL;
static RRDDIM *rd_shmem_pmd_mapped = NULL, *rd_file_huge_pages = NULL, *rd_file_pmd_mapped = NULL;
if(unlikely(!st_mem_thp_details)) {
st_mem_thp_details = rrdset_create_localhost(
"mem"
, "thp_details"
, NULL
, "hugepages"
, NULL
, "Details of Transparent HugePages Usage"
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS
, update_every
, RRDSET_TYPE_LINE
);
rrdset_flag_set(st_mem_thp_details, RRDSET_FLAG_DETAIL);
rd_shmem_pmd_mapped = rrddim_add(st_mem_thp_details, "shmem_pmd", "ShmemPmdMapped", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_file_huge_pages = rrddim_add(st_mem_thp_details, "file", "FileHugePages", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_file_pmd_mapped = rrddim_add(st_mem_thp_details, "file_pmd", "FilePmdMapped", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
rrddim_set_by_pointer(st_mem_thp_details, rd_shmem_pmd_mapped, ShmemPmdMapped);
rrddim_set_by_pointer(st_mem_thp_details, rd_file_huge_pages, FileHugePages);
rrddim_set_by_pointer(st_mem_thp_details, rd_file_pmd_mapped, FilePmdMapped);
rrdset_done(st_mem_thp_details);
}
}
if(do_reclaiming != CONFIG_BOOLEAN_NO) {
static RRDSET *st_mem_reclaiming = NULL;
static RRDDIM *rd_active = NULL, *rd_inactive = NULL,
*rd_active_anon = NULL, *rd_inactive_anon = NULL,
*rd_active_file = NULL, *rd_inactive_file = NULL,
*rd_unevictable = NULL, *rd_mlocked = NULL;
if(unlikely(!st_mem_reclaiming)) {
st_mem_reclaiming = rrdset_create_localhost(
"mem"
, "reclaiming"
, NULL
, "reclaiming"
, NULL
, "Memory Reclaiming"
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_RECLAIMING
, update_every
, RRDSET_TYPE_LINE
);
rrdset_flag_set(st_mem_reclaiming, RRDSET_FLAG_DETAIL);
rd_active = rrddim_add(st_mem_reclaiming, "active", "Active", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_inactive = rrddim_add(st_mem_reclaiming, "inactive", "Inactive", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_active_anon = rrddim_add(st_mem_reclaiming, "active_anon", "Active(anon)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_inactive_anon = rrddim_add(st_mem_reclaiming, "inactive_anon", "Inactive(anon)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_active_file = rrddim_add(st_mem_reclaiming, "active_file", "Active(file)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_inactive_file = rrddim_add(st_mem_reclaiming, "inactive_file", "Inactive(file)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_unevictable = rrddim_add(st_mem_reclaiming, "unevictable", "Unevictable", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_mlocked = rrddim_add(st_mem_reclaiming, "mlocked", "Mlocked", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
rrddim_set_by_pointer(st_mem_reclaiming, rd_active, Active);
rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive, Inactive);
rrddim_set_by_pointer(st_mem_reclaiming, rd_active_anon, ActiveAnon);
rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive_anon, InactiveAnon);
rrddim_set_by_pointer(st_mem_reclaiming, rd_active_file, ActiveFile);
rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive_file, InactiveFile);
rrddim_set_by_pointer(st_mem_reclaiming, rd_unevictable, Unevictable);
rrddim_set_by_pointer(st_mem_reclaiming, rd_mlocked, Mlocked);
rrdset_done(st_mem_reclaiming);
}
if(do_high_low != CONFIG_BOOLEAN_NO && (arl_high_low->flags & ARL_ENTRY_FLAG_FOUND)) {
static RRDSET *st_mem_high_low = NULL;
static RRDDIM *rd_high_used = NULL, *rd_low_used = NULL;
static RRDDIM *rd_high_free = NULL, *rd_low_free = NULL;
if(unlikely(!st_mem_high_low)) {
st_mem_high_low = rrdset_create_localhost(
"mem"
, "high_low"
, NULL
, "high_low"
, NULL
, "High and Low Used and Free Memory Areas"
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_HIGH_LOW
, update_every
, RRDSET_TYPE_STACKED
);
rrdset_flag_set(st_mem_high_low, RRDSET_FLAG_DETAIL);
rd_high_used = rrddim_add(st_mem_high_low, "high_used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_low_used = rrddim_add(st_mem_high_low, "low_used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_high_free = rrddim_add(st_mem_high_low, "high_free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_low_free = rrddim_add(st_mem_high_low, "low_free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
rrddim_set_by_pointer(st_mem_high_low, rd_high_used, HighTotal - HighFree);
rrddim_set_by_pointer(st_mem_high_low, rd_low_used, LowTotal - LowFree);
rrddim_set_by_pointer(st_mem_high_low, rd_high_free, HighFree);
rrddim_set_by_pointer(st_mem_high_low, rd_low_free, LowFree);
rrdset_done(st_mem_high_low);
}
if(do_cma == CONFIG_BOOLEAN_YES || (do_cma == CONFIG_BOOLEAN_AUTO && (arl_cma_total->flags & ARL_ENTRY_FLAG_FOUND) && CmaTotal)) {
do_cma = CONFIG_BOOLEAN_YES;
static RRDSET *st_mem_cma = NULL;
static RRDDIM *rd_used = NULL, *rd_free = NULL;
if(unlikely(!st_mem_cma)) {
st_mem_cma = rrdset_create_localhost(
"mem"
, "cma"
, NULL
, "cma"
, NULL
, "Contiguous Memory Allocator (CMA) Memory"
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_CMA
, update_every
, RRDSET_TYPE_STACKED
);
rd_used = rrddim_add(st_mem_cma, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
rd_free = rrddim_add(st_mem_cma, "free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
rrddim_set_by_pointer(st_mem_cma, rd_used, CmaTotal - CmaFree);
rrddim_set_by_pointer(st_mem_cma, rd_free, CmaFree);
rrdset_done(st_mem_cma);
}
if(do_directmap != CONFIG_BOOLEAN_NO &&
((arl_directmap4k->flags & ARL_ENTRY_FLAG_FOUND) ||
(arl_directmap2m->flags & ARL_ENTRY_FLAG_FOUND) ||
(arl_directmap4m->flags & ARL_ENTRY_FLAG_FOUND) ||
(arl_directmap1g->flags & ARL_ENTRY_FLAG_FOUND)))
{
static RRDSET *st_mem_directmap = NULL;
static RRDDIM *rd_4k = NULL, *rd_2m = NULL, *rd_1g = NULL, *rd_4m = NULL;
if(unlikely(!st_mem_directmap)) {
st_mem_directmap = rrdset_create_localhost(
"mem"
, "directmaps"
, NULL
, "overview"
, NULL
, "Direct Memory Mappings"
, "MiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_MEMINFO_NAME
, NETDATA_CHART_PRIO_MEM_DIRECTMAP
, update_every
, RRDSET_TYPE_STACKED
);
if(arl_directmap4k->flags & ARL_ENTRY_FLAG_FOUND)
rd_4k = rrddim_add(st_mem_directmap, "4k", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
if(arl_directmap2m->flags & ARL_ENTRY_FLAG_FOUND)
rd_2m = rrddim_add(st_mem_directmap, "2m", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
if(arl_directmap4m->flags & ARL_ENTRY_FLAG_FOUND)
rd_4m = rrddim_add(st_mem_directmap, "4m", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
if(arl_directmap1g->flags & ARL_ENTRY_FLAG_FOUND)
rd_1g = rrddim_add(st_mem_directmap, "1g", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
}
if(rd_4k)
rrddim_set_by_pointer(st_mem_directmap, rd_4k, DirectMap4k);
if(rd_2m)
rrddim_set_by_pointer(st_mem_directmap, rd_2m, DirectMap2M);
if(rd_4m)
rrddim_set_by_pointer(st_mem_directmap, rd_4m, DirectMap4M);
if(rd_1g)
rrddim_set_by_pointer(st_mem_directmap, rd_1g, DirectMap1G);
rrdset_done(st_mem_directmap);
}
return 0;

View file

@ -12,28 +12,55 @@ static int pressure_update_every = 0;
static struct pressure resources[PRESSURE_NUM_RESOURCES] = {
{
.some =
{.share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
.total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}},
.full =
{.share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
.total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}},
.some = {
.available = true,
.share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
.total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}
},
.full = {
// Disable CPU full pressure.
// See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8
.available = false,
.share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
.total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}
},
},
{
.some =
{.share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
.total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}},
.full =
{.share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
.total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}},
.some = {
.available = true,
.share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
.total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}
},
.full = {
.available = true,
.share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
.total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}
},
},
{
.some =
{.share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
.total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}},
.full =
{.share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
.total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}},
.some = {
.available = true,
.share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
.total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}
},
.full = {
.available = true,
.share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
.total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}
},
},
{
.some = {
// this is not available
.available = false,
.share_time = {.id = "irq_some_pressure", .title = "IRQ some pressure"},
.total_time = {.id = "irq_some_pressure_stall_time", .title = "IRQ some pressure stall time"}
},
.full = {
.available = true,
.share_time = {.id = "irq_full_pressure", .title = "IRQ full pressure"},
.total_time = {.id = "irq_full_pressure_stall_time", .title = "IRQ full pressure stall time"}
},
},
};
@ -46,6 +73,7 @@ static struct resource_info {
{ .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU },
{ .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM },
{ .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO },
{ .name = "irq", .family = "interrupts", .section_priority = NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS },
};
void update_pressure_charts(struct pressure_charts *pcs) {
@ -65,7 +93,7 @@ void update_pressure_charts(struct pressure_charts *pcs) {
}
}
static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
static void proc_pressure_do_resource(procfile *ff, int res_idx, size_t line, bool some) {
struct pressure_charts *pcs;
struct resource_info ri;
pcs = some ? &resources[res_idx].some : &resources[res_idx].full;
@ -93,9 +121,9 @@ static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
rrddim_add(pcs->share_time.st, some ? "some 300" : "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
}
pcs->share_time.value10 = strtod(procfile_lineword(ff, some ? 0 : 1, 2), NULL);
pcs->share_time.value60 = strtod(procfile_lineword(ff, some ? 0 : 1, 4), NULL);
pcs->share_time.value300 = strtod(procfile_lineword(ff, some ? 0 : 1, 6), NULL);
pcs->share_time.value10 = strtod(procfile_lineword(ff, line, 2), NULL);
pcs->share_time.value60 = strtod(procfile_lineword(ff, line, 4), NULL);
pcs->share_time.value300 = strtod(procfile_lineword(ff, line, 6), NULL);
if (unlikely(!pcs->total_time.st)) {
pcs->total_time.st = rrdset_create_localhost(
@ -114,19 +142,19 @@ static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
pcs->total_time.rdtotal = rrddim_add(pcs->total_time.st, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
pcs->total_time.value_total = str2ull(procfile_lineword(ff, some ? 0 : 1, 8), NULL) / 1000;
pcs->total_time.value_total = str2ull(procfile_lineword(ff, line, 8), NULL) / 1000;
}
static void proc_pressure_do_resource_some(procfile *ff, int res_idx) {
proc_pressure_do_resource(ff, res_idx, 1);
static void proc_pressure_do_resource_some(procfile *ff, int res_idx, size_t line) {
proc_pressure_do_resource(ff, res_idx, line, true);
}
static void proc_pressure_do_resource_full(procfile *ff, int res_idx) {
proc_pressure_do_resource(ff, res_idx, 0);
static void proc_pressure_do_resource_full(procfile *ff, int res_idx, size_t line) {
proc_pressure_do_resource(ff, res_idx, line, false);
}
int do_proc_pressure(int update_every, usec_t dt) {
int fail_count = 0;
int ok_count = 0;
int i;
static usec_t next_pressure_dt = 0;
@ -161,56 +189,59 @@ int do_proc_pressure(int update_every, usec_t dt) {
, base_path
, resource_info[i].name);
do_some = resources[i].some.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
do_full = resources[i].full.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name);
do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_some);
resources[i].some.enabled = do_some;
// Disable CPU full pressure.
// See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8
if (i == 0) {
do_full = CONFIG_BOOLEAN_NO;
resources[i].full.enabled = do_full;
} else {
snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_full);
resources[i].full.enabled = do_full;
}
if(!do_full && !do_some)
continue;
ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT);
if (unlikely(!ff)) {
collector_error("Cannot read pressure information from %s.", filename);
fail_count++;
continue;
}
}
ff = procfile_readall(ff);
resource_info[i].pf = ff;
if (unlikely(!ff)) {
fail_count++;
if (unlikely(!ff))
continue;
}
size_t lines = procfile_lines(ff);
if (unlikely(lines < 1)) {
collector_error("%s has no lines.", procfile_filename(ff));
fail_count++;
continue;
}
for(size_t l = 0; l < lines ;l++) {
const char *key = procfile_lineword(ff, l, 0);
if(strcmp(key, "some") == 0) {
if(do_some) {
proc_pressure_do_resource_some(ff, i);
proc_pressure_do_resource_some(ff, i, l);
update_pressure_charts(&resources[i].some);
ok_count++;
}
if (do_full && lines > 2) {
proc_pressure_do_resource_full(ff, i);
}
else if(strcmp(key, "full") == 0) {
if(do_full) {
proc_pressure_do_resource_full(ff, i, l);
update_pressure_charts(&resources[i].full);
ok_count++;
}
}
}
}
if (PRESSURE_NUM_RESOURCES == fail_count) {
if(!ok_count)
return 1;
}
return 0;
}

View file

@ -3,13 +3,14 @@
#ifndef NETDATA_PROC_PRESSURE_H
#define NETDATA_PROC_PRESSURE_H
#define PRESSURE_NUM_RESOURCES 3
#define PRESSURE_NUM_RESOURCES 4
struct pressure {
int updated;
char *filename;
struct pressure_charts {
bool available;
int enabled;
struct pressure_share_time_chart {

View file

@ -494,7 +494,7 @@ int do_proc_stat(int update_every, usec_t dt) {
do_processes = config_get_boolean("plugin:proc:/proc/stat", "processes running", CONFIG_BOOLEAN_YES);
// give sane defaults based on the number of processors
if(unlikely(get_system_cpus() > 50)) {
if(unlikely(get_system_cpus() > 128)) {
// the system has too many processors
keep_per_core_fds_open = CONFIG_BOOLEAN_NO;
do_core_throttle_count = CONFIG_BOOLEAN_NO;

View file

@ -271,7 +271,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
if(unlikely(!st_swapio)) {
st_swapio = rrdset_create_localhost(
"system"
"mem"
, "swapio"
, NULL
, "swap"
@ -280,7 +280,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
, "KiB/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_VMSTAT_NAME
, NETDATA_CHART_PRIO_SYSTEM_SWAPIO
, NETDATA_CHART_PRIO_MEM_SWAPIO
, update_every
, RRDSET_TYPE_AREA
);
@ -336,7 +336,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
"mem"
, "pgfaults"
, NULL
, "system"
, "page faults"
, NULL
, "Memory Page Faults"
, "faults/s"
@ -372,7 +372,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
"mem"
, "oom_kill"
, NULL
, "system"
, "OOM kills"
, NULL
, "Out of Memory Kills"
, "kills/s"
@ -505,7 +505,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
if(unlikely(!st_zswapio)) {
st_zswapio = rrdset_create_localhost(
"system"
"mem"
, "zswapio"
, NULL
, "zswap"
@ -514,7 +514,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
, "KiB/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_VMSTAT_NAME
, NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO
, NETDATA_CHART_PRIO_MEM_ZSWAPIO
, update_every
, RRDSET_TYPE_AREA
);

View file

@ -2,7 +2,7 @@
# you can disable an alarm notification by setting the 'to' line to: silent
alarm: 30min_ram_swapped_out
on: system.swapio
on: mem.swapio
class: Workload
type: System
component: Memory
@ -19,7 +19,7 @@ component: Memory
to: sysadmin
alarm: used_swap
on: system.swap
on: mem.swap
class: Utilization
type: System
component: Memory