mirror of
https://github.com/netdata/netdata.git
synced 2025-04-28 14:42:31 +00:00
parent
accc426c8a
commit
065091c3f5
12 changed files with 1089 additions and 253 deletions
|
@ -23,18 +23,6 @@
|
|||
#define NETDATA_CHART_PRIO_SYSTEM_IO 150
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_PGPGIO 151
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_RAM 200
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_SWAP 201
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS 202
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO 250
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO 300
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO 301
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE 302
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE 303
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS 304
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT 305
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES 306
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE 307
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY 308
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_NET 500
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_IPV4 500 // freebsd only
|
||||
#define NETDATA_CHART_PRIO_SYSTEM_IP 501
|
||||
|
@ -87,17 +75,35 @@
|
|||
#define NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE 1010
|
||||
#define NETDATA_CHART_PRIO_MEM_SYSTEM_OOM_KILL 1020
|
||||
#define NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED 1030
|
||||
#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1040
|
||||
#define NETDATA_CHART_PRIO_MEM_SWAP 1035
|
||||
#define NETDATA_CHART_PRIO_MEM_SWAP_CALLS 1037
|
||||
#define NETDATA_CHART_PRIO_MEM_SWAPIO 1038
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP 1036
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAPIO 1037
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO 1038
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE 1039
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE 1040
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS 1041
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT 1042
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES 1043
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE 1044
|
||||
#define NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY 1045
|
||||
#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1050
|
||||
#define NETDATA_CHART_PRIO_MEM_KERNEL 1100
|
||||
#define NETDATA_CHART_PRIO_MEM_SLAB 1200
|
||||
#define NETDATA_CHART_PRIO_MEM_RECLAIMING 1210
|
||||
#define NETDATA_CHART_PRIO_MEM_HIGH_LOW 1211
|
||||
#define NETDATA_CHART_PRIO_MEM_CMA 1212
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES 1250
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1251
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1252
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1253
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1254
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1255
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1256
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1257
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS 1251
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1252
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1253
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1254
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1255
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1256
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1257
|
||||
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1258
|
||||
#define NETDATA_CHART_PRIO_MEM_DIRECTMAP 1260
|
||||
#define NETDATA_CHART_PRIO_MEM_KSM 1300
|
||||
#define NETDATA_CHART_PRIO_MEM_KSM_SAVINGS 1301
|
||||
#define NETDATA_CHART_PRIO_MEM_KSM_RATIOS 1302
|
||||
|
|
|
@ -62,6 +62,8 @@ static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
|
|||
static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
|
||||
static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
|
||||
static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
|
||||
static int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO;
|
||||
static int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO;
|
||||
|
||||
static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
|
||||
static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
|
||||
|
@ -828,6 +830,7 @@ struct cgroup {
|
|||
struct pressure cpu_pressure;
|
||||
struct pressure io_pressure;
|
||||
struct pressure memory_pressure;
|
||||
struct pressure irq_pressure;
|
||||
|
||||
// per cgroup charts
|
||||
RRDSET *st_cpu;
|
||||
|
@ -1451,28 +1454,33 @@ static inline void cgroup2_read_pressure(struct pressure *res) {
|
|||
return;
|
||||
}
|
||||
|
||||
res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL);
|
||||
res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL);
|
||||
res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL);
|
||||
res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8), NULL) / 1000; // us->ms
|
||||
bool did_some = false, did_full = false;
|
||||
|
||||
if (lines > 2) {
|
||||
res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL);
|
||||
res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL);
|
||||
res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL);
|
||||
res->full.total_time.value_total = str2ull(procfile_lineword(ff, 1, 8), NULL) / 1000; // us->ms
|
||||
for(size_t l = 0; l < lines ;l++) {
|
||||
const char *key = procfile_lineword(ff, l, 0);
|
||||
if(strcmp(key, "some") == 0) {
|
||||
res->some.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL);
|
||||
res->some.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL);
|
||||
res->some.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL);
|
||||
res->some.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms
|
||||
did_some = true;
|
||||
}
|
||||
|
||||
res->updated = 1;
|
||||
|
||||
if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) {
|
||||
res->some.enabled = CONFIG_BOOLEAN_YES;
|
||||
if (lines > 2) {
|
||||
res->full.enabled = CONFIG_BOOLEAN_YES;
|
||||
} else {
|
||||
res->full.enabled = CONFIG_BOOLEAN_NO;
|
||||
else if(strcmp(key, "full") == 0) {
|
||||
res->full.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL);
|
||||
res->full.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL);
|
||||
res->full.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL);
|
||||
res->full.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms
|
||||
did_full = true;
|
||||
}
|
||||
}
|
||||
|
||||
res->updated = (did_full || did_some) ? 1 : 0;
|
||||
|
||||
if(unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO))
|
||||
res->some.enabled = (did_some) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
|
||||
|
||||
if(unlikely(res->full.enabled == CONFIG_BOOLEAN_AUTO))
|
||||
res->full.enabled = (did_full) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1637,6 +1645,7 @@ static inline void read_cgroup(struct cgroup *cg) {
|
|||
cgroup2_read_pressure(&cg->cpu_pressure);
|
||||
cgroup2_read_pressure(&cg->io_pressure);
|
||||
cgroup2_read_pressure(&cg->memory_pressure);
|
||||
cgroup2_read_pressure(&cg->irq_pressure);
|
||||
cgroup_read_memory(&cg->memory, 1);
|
||||
}
|
||||
}
|
||||
|
@ -1851,6 +1860,7 @@ static inline void cgroup_free(struct cgroup *cg) {
|
|||
free_pressure(&cg->cpu_pressure);
|
||||
free_pressure(&cg->io_pressure);
|
||||
free_pressure(&cg->memory_pressure);
|
||||
free_pressure(&cg->irq_pressure);
|
||||
|
||||
freez(cg->id);
|
||||
freez(cg->intermediate_id);
|
||||
|
@ -2465,6 +2475,18 @@ static inline void discovery_update_filenames() {
|
|||
netdata_log_debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) {
|
||||
snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id);
|
||||
if (likely(stat(filename, &buf) != -1)) {
|
||||
cg->irq_pressure.filename = strdupz(filename);
|
||||
cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some;
|
||||
cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full;
|
||||
netdata_log_debug(D_CGROUP, "irq.pressure filename for cgroup '%s': '%s'", cg->id, cg->irq_pressure.filename);
|
||||
} else {
|
||||
netdata_log_debug(D_CGROUP, "irq.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4643,6 +4665,112 @@ void update_cgroup_charts(int update_every) {
|
|||
update_pressure_charts(pcs);
|
||||
}
|
||||
|
||||
res = &cg->irq_pressure;
|
||||
|
||||
if (likely(res->updated && res->some.enabled)) {
|
||||
struct pressure_charts *pcs;
|
||||
pcs = &res->some;
|
||||
|
||||
if (unlikely(!pcs->share_time.st)) {
|
||||
RRDSET *chart;
|
||||
snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure");
|
||||
chart = pcs->share_time.st = rrdset_create_localhost(
|
||||
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||
, "irq_some_pressure"
|
||||
, NULL
|
||||
, "interrupts"
|
||||
, k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure"
|
||||
, title
|
||||
, "percentage"
|
||||
, PLUGIN_CGROUPS_NAME
|
||||
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||
, cgroup_containers_chart_priority + 2310
|
||||
, update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
|
||||
pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||
pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||
pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
if (unlikely(!pcs->total_time.st)) {
|
||||
RRDSET *chart;
|
||||
snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure stall time");
|
||||
chart = pcs->total_time.st = rrdset_create_localhost(
|
||||
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||
, "irq_some_pressure_stall_time"
|
||||
, NULL
|
||||
, "interrupts"
|
||||
, k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time"
|
||||
, title
|
||||
, "ms"
|
||||
, PLUGIN_CGROUPS_NAME
|
||||
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||
, cgroup_containers_chart_priority + 2330
|
||||
, update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
|
||||
pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
|
||||
update_pressure_charts(pcs);
|
||||
}
|
||||
|
||||
if (likely(res->updated && res->full.enabled)) {
|
||||
struct pressure_charts *pcs;
|
||||
pcs = &res->full;
|
||||
|
||||
if (unlikely(!pcs->share_time.st)) {
|
||||
RRDSET *chart;
|
||||
snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure");
|
||||
|
||||
chart = pcs->share_time.st = rrdset_create_localhost(
|
||||
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||
, "irq_full_pressure"
|
||||
, NULL
|
||||
, "interrupts"
|
||||
, k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure"
|
||||
, title
|
||||
, "percentage"
|
||||
, PLUGIN_CGROUPS_NAME
|
||||
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||
, cgroup_containers_chart_priority + 2350
|
||||
, update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
|
||||
rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
|
||||
pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||
pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||
pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
if (unlikely(!pcs->total_time.st)) {
|
||||
RRDSET *chart;
|
||||
snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure stall time");
|
||||
chart = pcs->total_time.st = rrdset_create_localhost(
|
||||
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
|
||||
, "irq_full_pressure_stall_time"
|
||||
, NULL
|
||||
, "interrupts"
|
||||
, k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time"
|
||||
, title
|
||||
, "ms"
|
||||
, PLUGIN_CGROUPS_NAME
|
||||
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
|
||||
, cgroup_containers_chart_priority + 2370
|
||||
, update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
|
||||
pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
|
||||
update_pressure_charts(pcs);
|
||||
}
|
||||
|
||||
res = &cg->io_pressure;
|
||||
|
||||
if (likely(res->updated && res->some.enabled)) {
|
||||
|
|
|
@ -38,7 +38,7 @@ static struct netdata_zswap_metric zswap_calculated_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_LINE,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO,
|
||||
.divisor = 100,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -71,7 +71,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_AREA,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE,
|
||||
.divisor = 1,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -84,7 +84,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_AREA,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE,
|
||||
.divisor = 1,
|
||||
.convertv = pages_to_bytes,
|
||||
.value = -1},
|
||||
|
@ -97,7 +97,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_LINE,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT,
|
||||
.divisor = 1,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -110,7 +110,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_AREA,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES,
|
||||
.divisor = 1,
|
||||
.convertv = pages_to_bytes,
|
||||
.value = -1},
|
||||
|
@ -123,7 +123,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_AREA,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE,
|
||||
.divisor = 1,
|
||||
.convertv = pages_to_bytes,
|
||||
.value = -1},
|
||||
|
@ -136,7 +136,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_LINE,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY,
|
||||
.divisor = 1,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -175,7 +175,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_STACKED,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
|
||||
.divisor = 1,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -188,7 +188,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_STACKED,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
|
||||
.divisor = 1,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -201,7 +201,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_STACKED,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
|
||||
.divisor = 1,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -214,7 +214,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_STACKED,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
|
||||
.divisor = 1,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -227,7 +227,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
|
|||
.charttype = RRDSET_TYPE_STACKED,
|
||||
.enabled = CONFIG_BOOLEAN_YES,
|
||||
.chart_created = CONFIG_BOOLEAN_NO,
|
||||
.prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
|
||||
.prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
|
||||
.divisor = 1,
|
||||
.convertv = NULL,
|
||||
.value = -1},
|
||||
|
@ -266,7 +266,7 @@ zswap_send_chart(struct netdata_zswap_metric *metric, int update_every, const ch
|
|||
{
|
||||
fprintf(
|
||||
stdout,
|
||||
"CHART system.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n",
|
||||
"CHART mem.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n",
|
||||
metric->chart_id,
|
||||
metric->title,
|
||||
metric->units,
|
||||
|
@ -291,7 +291,7 @@ static void zswap_send_dimension(struct netdata_zswap_metric *metric)
|
|||
|
||||
static void zswap_send_begin(struct netdata_zswap_metric *metric)
|
||||
{
|
||||
fprintf(stdout, "BEGIN system.zswap_%s\n", metric->chart_id);
|
||||
fprintf(stdout, "BEGIN mem.zswap_%s\n", metric->chart_id);
|
||||
}
|
||||
|
||||
static void zswap_send_set(struct netdata_zswap_metric *metric)
|
||||
|
|
|
@ -322,13 +322,13 @@ void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em)
|
|||
*/
|
||||
static void ebpf_obsolete_swap_global(ebpf_module_t *em)
|
||||
{
|
||||
ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP,
|
||||
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
|
||||
NETDATA_MEM_SWAP_CHART,
|
||||
"Calls to access swap memory",
|
||||
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU,
|
||||
NETDATA_EBPF_CHART_TYPE_LINE,
|
||||
NULL,
|
||||
NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS,
|
||||
NETDATA_CHART_PRIO_MEM_SWAP_CALLS,
|
||||
em->update_every);
|
||||
}
|
||||
|
||||
|
@ -914,12 +914,12 @@ static void ebpf_swap_allocate_global_vectors(int apps)
|
|||
*/
|
||||
static void ebpf_create_swap_charts(int update_every)
|
||||
{
|
||||
ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_MEM_SWAP_CHART,
|
||||
ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_MEM_SWAP_CHART,
|
||||
"Calls to access swap memory",
|
||||
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU,
|
||||
NULL,
|
||||
NETDATA_EBPF_CHART_TYPE_LINE,
|
||||
NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS,
|
||||
NETDATA_CHART_PRIO_MEM_SWAP_CALLS,
|
||||
ebpf_create_global_dimension,
|
||||
swap_publish_aggregated, NETDATA_SWAP_END,
|
||||
update_every, NETDATA_EBPF_MODULE_NAME_SWAP);
|
||||
|
|
|
@ -1035,7 +1035,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) {
|
|||
|
||||
if (unlikely(!st)) {
|
||||
st = rrdset_create_localhost(
|
||||
"system",
|
||||
"mem",
|
||||
"swapio",
|
||||
NULL,
|
||||
"swap",
|
||||
|
@ -1044,7 +1044,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) {
|
|||
"KiB/s",
|
||||
"freebsd.plugin",
|
||||
"vm.stats.vm.v_swappgs",
|
||||
NETDATA_CHART_PRIO_SYSTEM_SWAPIO,
|
||||
NETDATA_CHART_PRIO_MEM_SWAPIO,
|
||||
update_every,
|
||||
RRDSET_TYPE_AREA
|
||||
);
|
||||
|
|
|
@ -20,27 +20,50 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration provides a collection of statistics about the system such as CPU utilization, process counts and more."
|
||||
metrics_description: |
|
||||
CPU utilization, states and frequencies and key Linux system performance metrics.
|
||||
|
||||
The `/proc/stat` file provides various types of system statistics:
|
||||
|
||||
- The overall system CPU usage statistics
|
||||
- Per CPU core statistics
|
||||
- The total context switching of the system
|
||||
- The total number of processes running
|
||||
- The total CPU interrupts
|
||||
- The total CPU softirqs
|
||||
|
||||
The collector also reads:
|
||||
|
||||
- `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
|
||||
- `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
|
||||
- `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
|
||||
- `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
|
||||
- `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
|
||||
- `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
|
||||
- `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
include: [ "linux" ]
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
auto_detection:
|
||||
description: ""
|
||||
description: |
|
||||
The collector auto-detects all metrics. No configuration is needed.
|
||||
limits:
|
||||
description: ""
|
||||
performance_impact:
|
||||
description: ""
|
||||
description: |
|
||||
The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
|
||||
setup:
|
||||
prerequisites:
|
||||
list: []
|
||||
configuration:
|
||||
file:
|
||||
name: ""
|
||||
section_name: "plugin:proc:/proc/stat"
|
||||
name: "netdata.conf"
|
||||
description: ""
|
||||
options:
|
||||
description: ""
|
||||
|
@ -187,12 +210,43 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: ""
|
||||
metrics_description: |
|
||||
Entropy, a measure of the randomness or unpredictability of data.
|
||||
|
||||
In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
|
||||
secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
|
||||
vulnerable to attacks that exploit the predictability of the generated keys.
|
||||
|
||||
In most operating systems, entropy is generated by collecting random events from various sources, such as
|
||||
hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
|
||||
of entropy, which is then used to generate random numbers when needed.
|
||||
|
||||
The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
|
||||
to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
|
||||
which blocks until enough entropy is available to generate the requested numbers. This ensures that the
|
||||
generated numbers are truly random and not predictable.
|
||||
|
||||
However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
|
||||
programs that rely on random numbers to slow down or even freeze. This is especially problematic for
|
||||
cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
|
||||
|
||||
To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
|
||||
entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
|
||||
radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
|
||||
software-based sources.
|
||||
|
||||
One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
|
||||
for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
|
||||
high-quality entropy, which can be used to seed the pool of entropy in the operating system.
|
||||
|
||||
Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
|
||||
exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
|
||||
can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
include: [ "linux" ]
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -264,12 +318,23 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration provides the amount of time the system has been up (running)."
|
||||
metrics_description: |
|
||||
The amount of time the system has been up (running).
|
||||
|
||||
Uptime is a critical aspect of overall system performance:
|
||||
|
||||
- **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
|
||||
- **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
|
||||
- **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
|
||||
- **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
|
||||
- **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
|
||||
- **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
|
||||
- **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
include: [ "linux" ]
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -340,12 +405,33 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration provides information about process, memory, swap space utilization and many more."
|
||||
metrics_description: |
|
||||
Linux Virtual memory subsystem.
|
||||
|
||||
Information about memory management, indicating how effectively the kernel allocates and frees
|
||||
memory resources in response to system demands.
|
||||
|
||||
Monitors page faults, which occur when a process requests a portion of its memory that isn't
|
||||
immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
|
||||
provide insights into application behavior.
|
||||
|
||||
Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
|
||||
swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
|
||||
a compressed cache for swap pages, and provides insights into its usage and performance implications.
|
||||
|
||||
In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
|
||||
memory resources between host and guest systems.
|
||||
|
||||
For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
|
||||
can impact the performance based on the memory access times.
|
||||
|
||||
The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
|
||||
of memory resources.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
include: [ "linux" ]
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -535,7 +621,37 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "Monitor Interrupts metrics for efficient processor interrupt handling."
|
||||
metrics_description: |
|
||||
Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
|
||||
The numbers reported are the counts of the interrupts that have occurred of each type.
|
||||
|
||||
An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
|
||||
immediate attention. The processor then interrupts its current activities and executes the interrupt handler
|
||||
to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
|
||||
|
||||
The types of interrupts include:
|
||||
|
||||
- **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
|
||||
you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
|
||||
|
||||
- **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
|
||||
used to switch the CPU among different tasks.
|
||||
|
||||
- **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
|
||||
|
||||
- **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
|
||||
|
||||
Monitoring `/proc/interrupts` can be used for:
|
||||
|
||||
- **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
|
||||
configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
|
||||
performance degradation.
|
||||
|
||||
- **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
|
||||
|
||||
- **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
|
||||
understand what your system is doing. It can provide insights into the system's interaction with hardware,
|
||||
drivers, and other parts of the kernel.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
|
@ -621,12 +737,35 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration provides the system load average for the last 1, 5, and 15 minutes."
|
||||
metrics_description: |
|
||||
The `/proc/loadavg` file provides information about the system load average.
|
||||
|
||||
The load average is a measure of the amount of computational work that a system performs. It is a
|
||||
representation of the average system load over a period of time.
|
||||
|
||||
This file contains three numbers representing the system load averages for the last 1, 5, and 15 minutes,
|
||||
respectively. It also includes the currently running processes and the total number of processes.
|
||||
|
||||
Monitoring the load average can be used for:
|
||||
|
||||
- **System performance**: If the load average is too high, it may indicate that your system is overloaded.
|
||||
On a system with a single CPU, if the load average is 1, it means the single CPU is fully utilized. If the
|
||||
load averages are consistently higher than the number of CPUs/cores, it may indicate that your system is
|
||||
overloaded and tasks are waiting for CPU time.
|
||||
|
||||
- **Troubleshooting**: If the load average is unexpectedly high, it can be a sign of a problem. This could be
|
||||
due to a runaway process, a software bug, or a hardware issue.
|
||||
|
||||
- **Capacity planning**: By monitoring the load average over time, you can understand the trends in your
|
||||
system's workload. This can help with capacity planning and scaling decisions.
|
||||
|
||||
Remember that load average not only considers CPU usage, but also includes processes waiting for disk I/O.
|
||||
Therefore, high load averages could be due to I/O contention as well as CPU contention.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -726,12 +865,35 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration measures system pressure, which can indicate resource shortages in CPU, memory, or I/O."
|
||||
metrics_description: |
|
||||
Introduced in Linux kernel 4.20, `/proc/pressure` provides information about system pressure stall information
|
||||
(PSI). PSI is a feature that allows the system to track the amount of time the system is stalled due to
|
||||
resource contention, such as CPU, memory, or I/O.
|
||||
|
||||
The collectors monitored 3 separate files for CPU, memory, and I/O:
|
||||
|
||||
- **cpu**: Tracks the amount of time tasks are stalled due to CPU contention.
|
||||
- **memory**: Tracks the amount of time tasks are stalled due to memory contention.
|
||||
- **io**: Tracks the amount of time tasks are stalled due to I/O contention.
|
||||
- **irq**: Tracks the amount of time tasks are stalled due to IRQ contention.
|
||||
|
||||
Each of them provides metrics for stall time over the last 10 seconds, 1 minute, 5 minutes, and 15 minutes.
|
||||
|
||||
Monitoring the /proc/pressure files can provide important insights into system performance and capacity planning:
|
||||
|
||||
- **Identifying resource contention**: If these metrics are consistently high, it indicates that tasks are
|
||||
frequently being stalled due to lack of resources, which can significantly degrade system performance.
|
||||
|
||||
- **Troubleshooting performance issues**: If a system is experiencing performance issues, these metrics can
|
||||
help identify whether resource contention is the cause.
|
||||
|
||||
- **Capacity planning**: By monitoring these metrics over time, you can understand trends in resource
|
||||
utilization and make informed decisions about when to add more resources to your system.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -878,7 +1040,29 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "Monitor SoftIRQs metrics for efficient software interrupt operations."
|
||||
metrics_description: |
|
||||
In the Linux kernel, handling of hardware interrupts is split into two halves: the top half and the bottom half.
|
||||
The top half is the routine that responds immediately to an interrupt, while the bottom half is deferred to be processed later.
|
||||
|
||||
Softirqs are a mechanism in the Linux kernel used to handle the bottom halves of interrupts, which can be
|
||||
deferred and processed later in a context where it's safe to enable interrupts.
|
||||
|
||||
The actual work of handling the interrupt is offloaded to a softirq and executed later when the system
|
||||
decides it's a good time to process them. This helps to keep the system responsive by not blocking the top
|
||||
half for too long, which could lead to missed interrupts.
|
||||
|
||||
Monitoring `/proc/softirqs` is useful for:
|
||||
|
||||
- **Performance tuning**: A high rate of softirqs could indicate a performance issue. For instance, a high
|
||||
rate of network softirqs (`NET_RX` and `NET_TX`) could indicate a network performance issue.
|
||||
|
||||
- **Troubleshooting**: If a system is behaving unexpectedly, checking the softirqs could provide clues about
|
||||
what is going on. For example, a sudden increase in block device softirqs (BLOCK) might indicate a problem
|
||||
with a disk.
|
||||
|
||||
- **Understanding system behavior**: Knowing what types of softirqs are happening can help you understand what
|
||||
your system is doing, particularly in terms of how it's interacting with hardware and how it's handling
|
||||
interrupts.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
|
@ -963,7 +1147,29 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration provides statistics on Softnet, such as processed events, dropped events and more."
|
||||
metrics_description: |
|
||||
`/proc/net/softnet_stat` provides statistics that relate to the handling of network packets by softirq.
|
||||
|
||||
It provides information about:
|
||||
|
||||
- Total number of processed packets (`processed`).
|
||||
- Times ksoftirq ran out of quota (`dropped`).
|
||||
- Times net_rx_action was rescheduled.
|
||||
- Number of times processed all lists before quota.
|
||||
- Number of times did not process all lists due to quota.
|
||||
- Number of times net_rx_action was rescheduled for GRO (Generic Receive Offload) cells.
|
||||
- Number of times GRO cells were processed.
|
||||
|
||||
Monitoring the /proc/net/softnet_stat file can be useful for:
|
||||
|
||||
- **Network performance monitoring**: By tracking the total number of processed packets and how many packets
|
||||
were dropped, you can gain insights into your system's network performance.
|
||||
|
||||
- **Troubleshooting**: If you're experiencing network-related issues, this collector can provide valuable clues.
|
||||
For instance, a high number of dropped packets may indicate a network problem.
|
||||
|
||||
- **Capacity planning**: If your system is consistently processing near its maximum capacity of network
|
||||
packets, it might be time to consider upgrading your network infrastructure.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
|
@ -1069,12 +1275,28 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "Monitor memory usage metrics for efficient system memory management."
|
||||
metrics_description: |
|
||||
`/proc/meminfo` provides detailed information about the system's current memory usage. It includes information
|
||||
about different types of memory, RAM, Swap, ZSwap, HugePages, Transparent HugePages (THP), Kernel memory,
|
||||
SLAB memory, memory mappings, and more.
|
||||
|
||||
Monitoring /proc/meminfo can be useful for:
|
||||
|
||||
- **Performance Tuning**: Understanding your system's memory usage can help you make decisions about system
|
||||
tuning and optimization. For example, if your system is frequently low on free memory, it might benefit
|
||||
from more RAM.
|
||||
|
||||
- **Troubleshooting**: If your system is experiencing problems, `/proc/meminfo` can provide clues about
|
||||
whether memory usage is a factor. For example, if your system is slow and cached swap is high, it could
|
||||
mean that your system is swapping out a lot of memory to disk, which can degrade performance.
|
||||
|
||||
- **Capacity Planning**: By monitoring memory usage over time, you can understand trends and make informed
|
||||
decisions about future capacity needs.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -1152,13 +1374,26 @@ modules:
|
|||
chart_type: area
|
||||
dimensions:
|
||||
- name: avail
|
||||
- name: system.swap
|
||||
- name: mem.swap
|
||||
description: System Swap
|
||||
unit: "MiB"
|
||||
chart_type: stacked
|
||||
dimensions:
|
||||
- name: free
|
||||
- name: used
|
||||
- name: mem.swap_cached
|
||||
description: Swap Memory Cached in RAM
|
||||
unit: "MiB"
|
||||
chart_type: stacked
|
||||
dimensions:
|
||||
- name: cached
|
||||
- name: mem.zswap
|
||||
description: Zswap Usage
|
||||
unit: "MiB"
|
||||
chart_type: stacked
|
||||
dimensions:
|
||||
- name: in-ram
|
||||
- name: on-disk
|
||||
- name: mem.hwcorrupt
|
||||
description: Corrupted Memory detected by ECC
|
||||
unit: "MiB"
|
||||
|
@ -1198,7 +1433,7 @@ modules:
|
|||
dimensions:
|
||||
- name: reclaimable
|
||||
- name: unreclaimable
|
||||
- name: mem.hugepage
|
||||
- name: mem.hugepages
|
||||
description: Dedicated HugePages Memory
|
||||
unit: "MiB"
|
||||
chart_type: stacked
|
||||
|
@ -1207,13 +1442,59 @@ modules:
|
|||
- name: used
|
||||
- name: surplus
|
||||
- name: reserved
|
||||
- name: mem.transparent_hugepages
|
||||
- name: mem.thp
|
||||
description: Transparent HugePages Memory
|
||||
unit: "MiB"
|
||||
chart_type: stacked
|
||||
dimensions:
|
||||
- name: anonymous
|
||||
- name: shmem
|
||||
- name: mem.thp_details
|
||||
description: Details of Transparent HugePages Usage
|
||||
unit: "MiB"
|
||||
chart_type: line
|
||||
dimensions:
|
||||
- name: ShmemPmdMapped
|
||||
- name: FileHugePages
|
||||
- name: FilePmdMapped
|
||||
- name: mem.reclaiming
|
||||
description: Memory Reclaiming
|
||||
unit: "MiB"
|
||||
chart_type: line
|
||||
dimensions:
|
||||
- name: Active
|
||||
- name: Inactive
|
||||
- name: Active(anon)
|
||||
- name: Inactive(anon)
|
||||
- name: Active(file)
|
||||
- name: Inactive(file)
|
||||
- name: Unevictable
|
||||
- name: Mlocked
|
||||
- name: mem.high_low
|
||||
description: High and Low Used and Free Memory Areas
|
||||
unit: "MiB"
|
||||
chart_type: stacked
|
||||
dimensions:
|
||||
- name: high_used
|
||||
- name: low_used
|
||||
- name: high_free
|
||||
- name: low_free
|
||||
- name: mem.cma
|
||||
description: Contiguous Memory Allocator (CMA) Memory
|
||||
unit: "MiB"
|
||||
chart_type: stacked
|
||||
dimensions:
|
||||
- name: used
|
||||
- name: free
|
||||
- name: mem.directmaps
|
||||
description: Direct Memory Mappings
|
||||
unit: "MiB"
|
||||
chart_type: stacked
|
||||
dimensions:
|
||||
- name: 4k
|
||||
- name: 2m
|
||||
- name: 4m
|
||||
- name: 1g
|
||||
- meta:
|
||||
plugin_name: proc.plugin
|
||||
module_name: /proc/pagetypeinfo
|
||||
|
@ -1238,7 +1519,7 @@ modules:
|
|||
supported_platforms:
|
||||
include: []
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -1326,7 +1607,20 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration monitors system memory errors detected and corrected by ECC RAM."
|
||||
metrics_description: |
|
||||
The Error Detection and Correction (EDAC) subsystem is detecting and reporting errors in the system's memory,
|
||||
primarily ECC (Error-Correcting Code) memory errors.
|
||||
|
||||
The collector provides data for:
|
||||
|
||||
- Per memory controller (MC): correctable and uncorrectable errors. These can be of 2 kinds:
|
||||
- errors related to a DIMM
|
||||
- errors that cannot be associated with a DIMM
|
||||
|
||||
- Per memory DIMM: correctable and uncorrectable errors. There are 2 kinds:
|
||||
- memory controllers that can identify the physical DIMMS and report errors directly for them,
|
||||
- memory controllers that report errors for memory address ranges that can be linked to dimms.
|
||||
In this case the DIMMS reported may be more than the physical DIMMS installed.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
|
@ -1423,7 +1717,7 @@ modules:
|
|||
- name: dimm_location
|
||||
description: Location of the memory module.
|
||||
- name: dimm_mem_type
|
||||
description: Type of the memory module. Usually either buffered or unbuffered memory.
|
||||
description: Type of the memory module.
|
||||
- name: size
|
||||
description: The amount of memory in megabytes that this memory module manages.
|
||||
metrics:
|
||||
|
@ -1453,7 +1747,19 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "Monitor NUMA metrics for efficient non-uniform memory access operations."
|
||||
metrics_description: |
|
||||
Information about NUMA (Non-Uniform Memory Access) nodes on the system.
|
||||
|
||||
NUMA is a method of configuring a cluster of microprocessor in a multiprocessing system so that they can
|
||||
share memory locally, improving performance and the ability of the system to be expanded. NUMA is used in a
|
||||
symmetric multiprocessing (SMP) system.
|
||||
|
||||
In a NUMA system, processors, memory, and I/O devices are grouped together into cells, also known as nodes.
|
||||
Each node has its own memory and set of I/O devices, and one or more processors. While a processor can access
|
||||
memory in any of the nodes, it does so faster when accessing memory within its own node.
|
||||
|
||||
The collector provides statistics on memory allocations for processes running on the NUMA nodes, revealing the
|
||||
efficiency of memory allocations in multi-node systems.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
|
@ -1530,15 +1836,23 @@ modules:
|
|||
description: ""
|
||||
keywords:
|
||||
- ksm
|
||||
- samepage
|
||||
- merging
|
||||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "Examine KSM metrics for insights into memory deduplication operations."
|
||||
metrics_description: |
|
||||
Kernel Samepage Merging (KSM) is a memory-saving feature in Linux that enables the kernel to examine the
|
||||
memory of different processes and identify identical pages. It then merges these identical pages into a
|
||||
single page that the processes share. This is particularly useful for virtualization, where multiple virtual
|
||||
machines might be running the same operating system or applications and have many identical pages.
|
||||
|
||||
The collector provides information about the operation and effectiveness of KSM on your system.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -1622,7 +1936,11 @@ modules:
|
|||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration monitors ZRAM usage, compression ratios, and more."
|
||||
metrics_description: |
|
||||
zRAM, or compressed RAM, is a block device that uses a portion of your system's RAM as a block device.
|
||||
The data written to this block device is compressed and stored in memory.
|
||||
|
||||
The collectors provides information about the operation and the effectiveness of zRAM on your system.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
|
@ -1715,15 +2033,30 @@ modules:
|
|||
keywords:
|
||||
- ipc
|
||||
- semaphores
|
||||
- shared memory
|
||||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "Monitor Inter Process Communication performance for optimal process interaction."
|
||||
metrics_description: |
|
||||
IPC stands for Inter-Process Communication. It is a mechanism which allows processes to communicate with each
|
||||
other and synchronize their actions.
|
||||
|
||||
This collector exposes information about:
|
||||
|
||||
- Message Queues: This allows messages to be exchanged between processes. It's a more flexible method that
|
||||
allows messages to be placed onto a queue and read at a later time.
|
||||
|
||||
- Shared Memory: This method allows for the fastest form of IPC because processes can exchange data by
|
||||
reading/writing into shared memory segments.
|
||||
|
||||
- Semaphores: They are used to synchronize the operations performed by independent processes. So, if multiple
|
||||
processes are trying to access a single shared resource, semaphores can ensure that only one process
|
||||
accesses the resource at a given time.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
exclude: []
|
||||
multi_instance: true
|
||||
multi_instance: false
|
||||
additional_permissions:
|
||||
description: ""
|
||||
default_behavior:
|
||||
|
@ -1828,13 +2161,18 @@ modules:
|
|||
description: ""
|
||||
keywords:
|
||||
- disk
|
||||
- disks
|
||||
- io
|
||||
- bcache
|
||||
- block devices
|
||||
most_popular: false
|
||||
overview:
|
||||
data_collection:
|
||||
metrics_description: "This integration provides statistics about disk and Bcache I/O."
|
||||
metrics_description: |
|
||||
Detailed statistics for each of your system's disk devices and partitions.
|
||||
The data is reported by the kernel and can be used to monitor disk activity on a Linux system.
|
||||
|
||||
Get valuable insight into how your disks are performing and where potential bottlenecks might be.
|
||||
method_description: ""
|
||||
supported_platforms:
|
||||
include: []
|
||||
|
|
|
@ -9,58 +9,92 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
(void)dt;
|
||||
|
||||
static procfile *ff = NULL;
|
||||
static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1, do_hugepages = -1, do_transparent_hugepages = -1;
|
||||
static int do_percpu = 0;
|
||||
static int do_ram = -1
|
||||
, do_swap = -1
|
||||
, do_hwcorrupt = -1
|
||||
, do_committed = -1
|
||||
, do_writeback = -1
|
||||
, do_kernel = -1
|
||||
, do_slab = -1
|
||||
, do_hugepages = -1
|
||||
, do_transparent_hugepages = -1
|
||||
, do_reclaiming = -1
|
||||
, do_high_low = -1
|
||||
, do_cma = -1
|
||||
, do_directmap = -1;
|
||||
|
||||
static ARL_BASE *arl_base = NULL;
|
||||
static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL;
|
||||
static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL, *arl_hugepages_total = NULL,
|
||||
*arl_zswapped = NULL, *arl_high_low = NULL, *arl_cma_total = NULL,
|
||||
*arl_directmap4k = NULL, *arl_directmap2m = NULL, *arl_directmap4m = NULL, *arl_directmap1g = NULL;
|
||||
|
||||
static unsigned long long
|
||||
MemTotal = 0,
|
||||
MemFree = 0,
|
||||
MemAvailable = 0,
|
||||
Buffers = 0,
|
||||
Cached = 0,
|
||||
//SwapCached = 0,
|
||||
//Active = 0,
|
||||
//Inactive = 0,
|
||||
//ActiveAnon = 0,
|
||||
//InactiveAnon = 0,
|
||||
//ActiveFile = 0,
|
||||
//InactiveFile = 0,
|
||||
//Unevictable = 0,
|
||||
//Mlocked = 0,
|
||||
SwapTotal = 0,
|
||||
SwapFree = 0,
|
||||
Dirty = 0,
|
||||
Writeback = 0,
|
||||
//AnonPages = 0,
|
||||
//Mapped = 0,
|
||||
Shmem = 0,
|
||||
Slab = 0,
|
||||
SReclaimable = 0,
|
||||
SUnreclaim = 0,
|
||||
KernelStack = 0,
|
||||
PageTables = 0,
|
||||
NFS_Unstable = 0,
|
||||
Bounce = 0,
|
||||
WritebackTmp = 0,
|
||||
//CommitLimit = 0,
|
||||
Committed_AS = 0,
|
||||
//VmallocTotal = 0,
|
||||
VmallocUsed = 0,
|
||||
//VmallocChunk = 0,
|
||||
Percpu = 0,
|
||||
AnonHugePages = 0,
|
||||
ShmemHugePages = 0,
|
||||
HugePages_Total = 0,
|
||||
HugePages_Free = 0,
|
||||
HugePages_Rsvd = 0,
|
||||
HugePages_Surp = 0,
|
||||
Hugepagesize = 0,
|
||||
//DirectMap4k = 0,
|
||||
//DirectMap2M = 0,
|
||||
HardwareCorrupted = 0;
|
||||
MemTotal = 0
|
||||
, MemFree = 0
|
||||
, MemAvailable = 0
|
||||
, Buffers = 0
|
||||
, Cached = 0
|
||||
, SwapCached = 0
|
||||
, Active = 0
|
||||
, Inactive = 0
|
||||
, ActiveAnon = 0
|
||||
, InactiveAnon = 0
|
||||
, ActiveFile = 0
|
||||
, InactiveFile = 0
|
||||
, Unevictable = 0
|
||||
, Mlocked = 0
|
||||
, HighTotal = 0
|
||||
, HighFree = 0
|
||||
, LowTotal = 0
|
||||
, LowFree = 0
|
||||
, MmapCopy = 0
|
||||
, SwapTotal = 0
|
||||
, SwapFree = 0
|
||||
, Zswap = 0
|
||||
, Zswapped = 0
|
||||
, Dirty = 0
|
||||
, Writeback = 0
|
||||
, AnonPages = 0
|
||||
, Mapped = 0
|
||||
, Shmem = 0
|
||||
, KReclaimable = 0
|
||||
, Slab = 0
|
||||
, SReclaimable = 0
|
||||
, SUnreclaim = 0
|
||||
, KernelStack = 0
|
||||
, ShadowCallStack = 0
|
||||
, PageTables = 0
|
||||
, SecPageTables = 0
|
||||
, NFS_Unstable = 0
|
||||
, Bounce = 0
|
||||
, WritebackTmp = 0
|
||||
, CommitLimit = 0
|
||||
, Committed_AS = 0
|
||||
, VmallocTotal = 0
|
||||
, VmallocUsed = 0
|
||||
, VmallocChunk = 0
|
||||
, Percpu = 0
|
||||
//, EarlyMemtestBad = 0
|
||||
, HardwareCorrupted = 0
|
||||
, AnonHugePages = 0
|
||||
, ShmemHugePages = 0
|
||||
, ShmemPmdMapped = 0
|
||||
, FileHugePages = 0
|
||||
, FilePmdMapped = 0
|
||||
, CmaTotal = 0
|
||||
, CmaFree = 0
|
||||
//, Unaccepted = 0
|
||||
, HugePages_Total = 0
|
||||
, HugePages_Free = 0
|
||||
, HugePages_Rsvd = 0
|
||||
, HugePages_Surp = 0
|
||||
, Hugepagesize = 0
|
||||
//, Hugetlb = 0
|
||||
, DirectMap4k = 0
|
||||
, DirectMap2M = 0
|
||||
, DirectMap4M = 0
|
||||
, DirectMap1G = 0
|
||||
;
|
||||
|
||||
if(unlikely(!arl_base)) {
|
||||
do_ram = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "system ram", 1);
|
||||
|
@ -72,6 +106,12 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
do_slab = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "slab memory", 1);
|
||||
do_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "hugepages", CONFIG_BOOLEAN_AUTO);
|
||||
do_transparent_hugepages = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "transparent hugepages", CONFIG_BOOLEAN_AUTO);
|
||||
do_reclaiming = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "memory reclaiming", CONFIG_BOOLEAN_AUTO);
|
||||
do_high_low = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "high low memory", CONFIG_BOOLEAN_AUTO);
|
||||
do_cma = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "cma memory", CONFIG_BOOLEAN_AUTO);
|
||||
do_directmap = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_MEMINFO, "direct maps", CONFIG_BOOLEAN_AUTO);
|
||||
|
||||
// https://github.com/torvalds/linux/blob/master/fs/proc/meminfo.c
|
||||
|
||||
arl_base = arl_create("meminfo", NULL, 60);
|
||||
arl_expect(arl_base, "MemTotal", &MemTotal);
|
||||
|
@ -79,46 +119,90 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
arl_memavailable = arl_expect(arl_base, "MemAvailable", &MemAvailable);
|
||||
arl_expect(arl_base, "Buffers", &Buffers);
|
||||
arl_expect(arl_base, "Cached", &Cached);
|
||||
//arl_expect(arl_base, "SwapCached", &SwapCached);
|
||||
//arl_expect(arl_base, "Active", &Active);
|
||||
//arl_expect(arl_base, "Inactive", &Inactive);
|
||||
//arl_expect(arl_base, "ActiveAnon", &ActiveAnon);
|
||||
//arl_expect(arl_base, "InactiveAnon", &InactiveAnon);
|
||||
//arl_expect(arl_base, "ActiveFile", &ActiveFile);
|
||||
//arl_expect(arl_base, "InactiveFile", &InactiveFile);
|
||||
//arl_expect(arl_base, "Unevictable", &Unevictable);
|
||||
//arl_expect(arl_base, "Mlocked", &Mlocked);
|
||||
arl_expect(arl_base, "SwapCached", &SwapCached);
|
||||
arl_expect(arl_base, "Active", &Active);
|
||||
arl_expect(arl_base, "Inactive", &Inactive);
|
||||
arl_expect(arl_base, "Active(anon)", &ActiveAnon);
|
||||
arl_expect(arl_base, "Inactive(anon)", &InactiveAnon);
|
||||
arl_expect(arl_base, "Active(file)", &ActiveFile);
|
||||
arl_expect(arl_base, "Inactive(file)", &InactiveFile);
|
||||
arl_expect(arl_base, "Unevictable", &Unevictable);
|
||||
arl_expect(arl_base, "Mlocked", &Mlocked);
|
||||
|
||||
// CONFIG_HIGHMEM
|
||||
arl_high_low = arl_expect(arl_base, "HighTotal", &HighTotal);
|
||||
arl_expect(arl_base, "HighFree", &HighFree);
|
||||
arl_expect(arl_base, "LowTotal", &LowTotal);
|
||||
arl_expect(arl_base, "LowFree", &LowFree);
|
||||
|
||||
// CONFIG_MMU
|
||||
arl_expect(arl_base, "MmapCopy", &MmapCopy);
|
||||
|
||||
arl_expect(arl_base, "SwapTotal", &SwapTotal);
|
||||
arl_expect(arl_base, "SwapFree", &SwapFree);
|
||||
|
||||
// CONFIG_ZSWAP
|
||||
arl_zswapped = arl_expect(arl_base, "Zswap", &Zswap);
|
||||
arl_expect(arl_base, "Zswapped", &Zswapped);
|
||||
|
||||
arl_expect(arl_base, "Dirty", &Dirty);
|
||||
arl_expect(arl_base, "Writeback", &Writeback);
|
||||
//arl_expect(arl_base, "AnonPages", &AnonPages);
|
||||
//arl_expect(arl_base, "Mapped", &Mapped);
|
||||
arl_expect(arl_base, "AnonPages", &AnonPages);
|
||||
arl_expect(arl_base, "Mapped", &Mapped);
|
||||
arl_expect(arl_base, "Shmem", &Shmem);
|
||||
arl_expect(arl_base, "KReclaimable", &KReclaimable);
|
||||
arl_expect(arl_base, "Slab", &Slab);
|
||||
arl_expect(arl_base, "SReclaimable", &SReclaimable);
|
||||
arl_expect(arl_base, "SUnreclaim", &SUnreclaim);
|
||||
arl_expect(arl_base, "KernelStack", &KernelStack);
|
||||
|
||||
// CONFIG_SHADOW_CALL_STACK
|
||||
arl_expect(arl_base, "ShadowCallStack", &ShadowCallStack);
|
||||
|
||||
arl_expect(arl_base, "PageTables", &PageTables);
|
||||
arl_expect(arl_base, "SecPageTables", &SecPageTables);
|
||||
arl_expect(arl_base, "NFS_Unstable", &NFS_Unstable);
|
||||
arl_expect(arl_base, "Bounce", &Bounce);
|
||||
arl_expect(arl_base, "WritebackTmp", &WritebackTmp);
|
||||
//arl_expect(arl_base, "CommitLimit", &CommitLimit);
|
||||
arl_expect(arl_base, "CommitLimit", &CommitLimit);
|
||||
arl_expect(arl_base, "Committed_AS", &Committed_AS);
|
||||
//arl_expect(arl_base, "VmallocTotal", &VmallocTotal);
|
||||
arl_expect(arl_base, "VmallocTotal", &VmallocTotal);
|
||||
arl_expect(arl_base, "VmallocUsed", &VmallocUsed);
|
||||
//arl_expect(arl_base, "VmallocChunk", &VmallocChunk);
|
||||
arl_expect(arl_base, "VmallocChunk", &VmallocChunk);
|
||||
arl_expect(arl_base, "Percpu", &Percpu);
|
||||
|
||||
// CONFIG_MEMTEST
|
||||
//arl_expect(arl_base, "EarlyMemtestBad", &EarlyMemtestBad);
|
||||
|
||||
// CONFIG_MEMORY_FAILURE
|
||||
arl_hwcorrupted = arl_expect(arl_base, "HardwareCorrupted", &HardwareCorrupted);
|
||||
|
||||
// CONFIG_TRANSPARENT_HUGEPAGE
|
||||
arl_expect(arl_base, "AnonHugePages", &AnonHugePages);
|
||||
arl_expect(arl_base, "ShmemHugePages", &ShmemHugePages);
|
||||
arl_expect(arl_base, "HugePages_Total", &HugePages_Total);
|
||||
arl_expect(arl_base, "ShmemPmdMapped", &ShmemPmdMapped);
|
||||
arl_expect(arl_base, "FileHugePages", &FileHugePages);
|
||||
arl_expect(arl_base, "FilePmdMapped", &FilePmdMapped);
|
||||
|
||||
// CONFIG_CMA
|
||||
arl_cma_total = arl_expect(arl_base, "CmaTotal", &CmaTotal);
|
||||
arl_expect(arl_base, "CmaFree", &CmaFree);
|
||||
|
||||
// CONFIG_UNACCEPTED_MEMORY
|
||||
//arl_expect(arl_base, "Unaccepted", &Unaccepted);
|
||||
|
||||
// these appear only when hugepages are supported
|
||||
arl_hugepages_total = arl_expect(arl_base, "HugePages_Total", &HugePages_Total);
|
||||
arl_expect(arl_base, "HugePages_Free", &HugePages_Free);
|
||||
arl_expect(arl_base, "HugePages_Rsvd", &HugePages_Rsvd);
|
||||
arl_expect(arl_base, "HugePages_Surp", &HugePages_Surp);
|
||||
arl_expect(arl_base, "Hugepagesize", &Hugepagesize);
|
||||
//arl_expect(arl_base, "DirectMap4k", &DirectMap4k);
|
||||
//arl_expect(arl_base, "DirectMap2M", &DirectMap2M);
|
||||
//arl_expect(arl_base, "Hugetlb", &Hugetlb);
|
||||
|
||||
arl_directmap4k = arl_expect(arl_base, "DirectMap4k", &DirectMap4k);
|
||||
arl_directmap2m = arl_expect(arl_base, "DirectMap2M", &DirectMap2M);
|
||||
arl_directmap4m = arl_expect(arl_base, "DirectMap4M", &DirectMap4M);
|
||||
arl_directmap1g = arl_expect(arl_base, "DirectMap1G", &DirectMap1G);
|
||||
}
|
||||
|
||||
if(unlikely(!ff)) {
|
||||
|
@ -136,26 +220,17 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
size_t lines = procfile_lines(ff), l;
|
||||
|
||||
arl_begin(arl_base);
|
||||
|
||||
static int first_ff_read = 1;
|
||||
|
||||
for(l = 0; l < lines ;l++) {
|
||||
size_t words = procfile_linewords(ff, l);
|
||||
if(unlikely(words < 2)) continue;
|
||||
|
||||
if (first_ff_read && !strcmp(procfile_lineword(ff, l, 0), "Percpu"))
|
||||
do_percpu = 1;
|
||||
|
||||
if(unlikely(arl_check(arl_base,
|
||||
procfile_lineword(ff, l, 0),
|
||||
procfile_lineword(ff, l, 1)))) break;
|
||||
}
|
||||
|
||||
if (first_ff_read)
|
||||
first_ff_read = 0;
|
||||
|
||||
// http://calimeroteknik.free.fr/blag/?article20/really-used-memory-on-gnu-linux
|
||||
unsigned long long MemCached = Cached + SReclaimable - Shmem;
|
||||
unsigned long long MemCached = Cached + SReclaimable + KReclaimable - Shmem;
|
||||
unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers;
|
||||
// The Linux kernel doesn't report ZFS ARC usage as cache memory (the ARC is included in the total used system memory)
|
||||
if (!inside_lxc_container) {
|
||||
|
@ -207,7 +282,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
"mem"
|
||||
, "available"
|
||||
, NULL
|
||||
, "system"
|
||||
, "overview"
|
||||
, NULL
|
||||
, "Available RAM for applications"
|
||||
, "MiB"
|
||||
|
@ -238,7 +313,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
|
||||
if(unlikely(!st_system_swap)) {
|
||||
st_system_swap = rrdset_create_localhost(
|
||||
"system"
|
||||
"mem"
|
||||
, "swap"
|
||||
, NULL
|
||||
, "swap"
|
||||
|
@ -247,7 +322,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_SYSTEM_SWAP
|
||||
, NETDATA_CHART_PRIO_MEM_SWAP
|
||||
, update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
@ -261,6 +336,62 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
rrddim_set_by_pointer(st_system_swap, rd_used, SwapUsed);
|
||||
rrddim_set_by_pointer(st_system_swap, rd_free, SwapFree);
|
||||
rrdset_done(st_system_swap);
|
||||
|
||||
{
|
||||
static RRDSET *st_mem_swap_cached = NULL;
|
||||
static RRDDIM *rd_cached = NULL;
|
||||
|
||||
if (unlikely(!st_mem_swap_cached)) {
|
||||
st_mem_swap_cached = rrdset_create_localhost(
|
||||
"mem"
|
||||
, "swap_cached"
|
||||
, NULL
|
||||
, "swap"
|
||||
, NULL
|
||||
, "Swap Memory Cached in RAM"
|
||||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_SWAP + 1
|
||||
, update_every
|
||||
, RRDSET_TYPE_AREA
|
||||
);
|
||||
|
||||
rd_cached = rrddim_add(st_mem_swap_cached, "cached", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_mem_swap_cached, rd_cached, SwapCached);
|
||||
rrdset_done(st_mem_swap_cached);
|
||||
}
|
||||
|
||||
if(arl_zswapped->flags & ARL_ENTRY_FLAG_FOUND) {
|
||||
static RRDSET *st_mem_zswap = NULL;
|
||||
static RRDDIM *rd_zswap = NULL, *rd_zswapped = NULL;
|
||||
|
||||
if (unlikely(!st_mem_zswap)) {
|
||||
st_mem_zswap = rrdset_create_localhost(
|
||||
"mem"
|
||||
, "zswap"
|
||||
, NULL
|
||||
, "zswap"
|
||||
, NULL
|
||||
, "Zswap Usage"
|
||||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_ZSWAP
|
||||
, update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
rd_zswap = rrddim_add(st_mem_zswap, "zswap", "in-ram", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_zswapped = rrddim_add(st_mem_zswap, "zswapped", "on-disk", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_mem_zswap, rd_zswap, Zswap);
|
||||
rrddim_set_by_pointer(st_mem_zswap, rd_zswapped, Zswapped);
|
||||
rrdset_done(st_mem_zswap);
|
||||
}
|
||||
}
|
||||
|
||||
if(arl_hwcorrupted->flags & ARL_ENTRY_FLAG_FOUND &&
|
||||
|
@ -306,7 +437,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
"mem"
|
||||
, "committed"
|
||||
, NULL
|
||||
, "system"
|
||||
, "overview"
|
||||
, NULL
|
||||
, "Committed (Allocated) Memory"
|
||||
, "MiB"
|
||||
|
@ -335,7 +466,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
"mem"
|
||||
, "writeback"
|
||||
, NULL
|
||||
, "kernel"
|
||||
, "writeback"
|
||||
, NULL
|
||||
, "Writeback Memory"
|
||||
, "MiB"
|
||||
|
@ -367,7 +498,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
if(do_kernel) {
|
||||
static RRDSET *st_mem_kernel = NULL;
|
||||
static RRDDIM *rd_slab = NULL, *rd_kernelstack = NULL, *rd_pagetables = NULL, *rd_vmallocused = NULL,
|
||||
*rd_percpu = NULL;
|
||||
*rd_percpu = NULL, *rd_kreclaimable = NULL;
|
||||
|
||||
if(unlikely(!st_mem_kernel)) {
|
||||
st_mem_kernel = rrdset_create_localhost(
|
||||
|
@ -391,16 +522,16 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
rd_kernelstack = rrddim_add(st_mem_kernel, "KernelStack", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_pagetables = rrddim_add(st_mem_kernel, "PageTables", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_vmallocused = rrddim_add(st_mem_kernel, "VmallocUsed", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
if (do_percpu)
|
||||
rd_percpu = rrddim_add(st_mem_kernel, "Percpu", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_kreclaimable = rrddim_add(st_mem_kernel, "KReclaimable", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_mem_kernel, rd_slab, Slab);
|
||||
rrddim_set_by_pointer(st_mem_kernel, rd_kernelstack, KernelStack);
|
||||
rrddim_set_by_pointer(st_mem_kernel, rd_pagetables, PageTables);
|
||||
rrddim_set_by_pointer(st_mem_kernel, rd_vmallocused, VmallocUsed);
|
||||
if (do_percpu)
|
||||
rrddim_set_by_pointer(st_mem_kernel, rd_percpu, Percpu);
|
||||
rrddim_set_by_pointer(st_mem_kernel, rd_kreclaimable, KReclaimable);
|
||||
|
||||
rrdset_done(st_mem_kernel);
|
||||
}
|
||||
|
@ -436,9 +567,10 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
rrdset_done(st_mem_slab);
|
||||
}
|
||||
|
||||
if(do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO &&
|
||||
if(arl_hugepages_total->flags & ARL_ENTRY_FLAG_FOUND &&
|
||||
(do_hugepages == CONFIG_BOOLEAN_YES || (do_hugepages == CONFIG_BOOLEAN_AUTO &&
|
||||
((Hugepagesize && HugePages_Total) ||
|
||||
netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
|
||||
netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES)))) {
|
||||
do_hugepages = CONFIG_BOOLEAN_YES;
|
||||
|
||||
static RRDSET *st_mem_hugepages = NULL;
|
||||
|
@ -455,7 +587,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1
|
||||
, NETDATA_CHART_PRIO_MEM_HUGEPAGES
|
||||
, update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
@ -487,7 +619,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
if(unlikely(!st_mem_transparent_hugepages)) {
|
||||
st_mem_transparent_hugepages = rrdset_create_localhost(
|
||||
"mem"
|
||||
, "transparent_hugepages"
|
||||
, "thp"
|
||||
, NULL
|
||||
, "hugepages"
|
||||
, NULL
|
||||
|
@ -495,7 +627,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_HUGEPAGES
|
||||
, NETDATA_CHART_PRIO_MEM_HUGEPAGES + 1
|
||||
, update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
@ -509,6 +641,206 @@ int do_proc_meminfo(int update_every, usec_t dt) {
|
|||
rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_anonymous, AnonHugePages);
|
||||
rrddim_set_by_pointer(st_mem_transparent_hugepages, rd_shared, ShmemHugePages);
|
||||
rrdset_done(st_mem_transparent_hugepages);
|
||||
|
||||
{
|
||||
static RRDSET *st_mem_thp_details = NULL;
|
||||
static RRDDIM *rd_shmem_pmd_mapped = NULL, *rd_file_huge_pages = NULL, *rd_file_pmd_mapped = NULL;
|
||||
|
||||
if(unlikely(!st_mem_thp_details)) {
|
||||
st_mem_thp_details = rrdset_create_localhost(
|
||||
"mem"
|
||||
, "thp_details"
|
||||
, NULL
|
||||
, "hugepages"
|
||||
, NULL
|
||||
, "Details of Transparent HugePages Usage"
|
||||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS
|
||||
, update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
|
||||
rrdset_flag_set(st_mem_thp_details, RRDSET_FLAG_DETAIL);
|
||||
|
||||
rd_shmem_pmd_mapped = rrddim_add(st_mem_thp_details, "shmem_pmd", "ShmemPmdMapped", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_file_huge_pages = rrddim_add(st_mem_thp_details, "file", "FileHugePages", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_file_pmd_mapped = rrddim_add(st_mem_thp_details, "file_pmd", "FilePmdMapped", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_mem_thp_details, rd_shmem_pmd_mapped, ShmemPmdMapped);
|
||||
rrddim_set_by_pointer(st_mem_thp_details, rd_file_huge_pages, FileHugePages);
|
||||
rrddim_set_by_pointer(st_mem_thp_details, rd_file_pmd_mapped, FilePmdMapped);
|
||||
rrdset_done(st_mem_thp_details);
|
||||
}
|
||||
}
|
||||
|
||||
if(do_reclaiming != CONFIG_BOOLEAN_NO) {
|
||||
static RRDSET *st_mem_reclaiming = NULL;
|
||||
static RRDDIM *rd_active = NULL, *rd_inactive = NULL,
|
||||
*rd_active_anon = NULL, *rd_inactive_anon = NULL,
|
||||
*rd_active_file = NULL, *rd_inactive_file = NULL,
|
||||
*rd_unevictable = NULL, *rd_mlocked = NULL;
|
||||
|
||||
if(unlikely(!st_mem_reclaiming)) {
|
||||
st_mem_reclaiming = rrdset_create_localhost(
|
||||
"mem"
|
||||
, "reclaiming"
|
||||
, NULL
|
||||
, "reclaiming"
|
||||
, NULL
|
||||
, "Memory Reclaiming"
|
||||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_RECLAIMING
|
||||
, update_every
|
||||
, RRDSET_TYPE_LINE
|
||||
);
|
||||
|
||||
rrdset_flag_set(st_mem_reclaiming, RRDSET_FLAG_DETAIL);
|
||||
|
||||
rd_active = rrddim_add(st_mem_reclaiming, "active", "Active", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_inactive = rrddim_add(st_mem_reclaiming, "inactive", "Inactive", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_active_anon = rrddim_add(st_mem_reclaiming, "active_anon", "Active(anon)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_inactive_anon = rrddim_add(st_mem_reclaiming, "inactive_anon", "Inactive(anon)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_active_file = rrddim_add(st_mem_reclaiming, "active_file", "Active(file)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_inactive_file = rrddim_add(st_mem_reclaiming, "inactive_file", "Inactive(file)", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_unevictable = rrddim_add(st_mem_reclaiming, "unevictable", "Unevictable", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_mlocked = rrddim_add(st_mem_reclaiming, "mlocked", "Mlocked", 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_mem_reclaiming, rd_active, Active);
|
||||
rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive, Inactive);
|
||||
rrddim_set_by_pointer(st_mem_reclaiming, rd_active_anon, ActiveAnon);
|
||||
rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive_anon, InactiveAnon);
|
||||
rrddim_set_by_pointer(st_mem_reclaiming, rd_active_file, ActiveFile);
|
||||
rrddim_set_by_pointer(st_mem_reclaiming, rd_inactive_file, InactiveFile);
|
||||
rrddim_set_by_pointer(st_mem_reclaiming, rd_unevictable, Unevictable);
|
||||
rrddim_set_by_pointer(st_mem_reclaiming, rd_mlocked, Mlocked);
|
||||
|
||||
rrdset_done(st_mem_reclaiming);
|
||||
}
|
||||
|
||||
if(do_high_low != CONFIG_BOOLEAN_NO && (arl_high_low->flags & ARL_ENTRY_FLAG_FOUND)) {
|
||||
static RRDSET *st_mem_high_low = NULL;
|
||||
static RRDDIM *rd_high_used = NULL, *rd_low_used = NULL;
|
||||
static RRDDIM *rd_high_free = NULL, *rd_low_free = NULL;
|
||||
|
||||
if(unlikely(!st_mem_high_low)) {
|
||||
st_mem_high_low = rrdset_create_localhost(
|
||||
"mem"
|
||||
, "high_low"
|
||||
, NULL
|
||||
, "high_low"
|
||||
, NULL
|
||||
, "High and Low Used and Free Memory Areas"
|
||||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_HIGH_LOW
|
||||
, update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
rrdset_flag_set(st_mem_high_low, RRDSET_FLAG_DETAIL);
|
||||
|
||||
rd_high_used = rrddim_add(st_mem_high_low, "high_used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_low_used = rrddim_add(st_mem_high_low, "low_used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_high_free = rrddim_add(st_mem_high_low, "high_free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_low_free = rrddim_add(st_mem_high_low, "low_free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_mem_high_low, rd_high_used, HighTotal - HighFree);
|
||||
rrddim_set_by_pointer(st_mem_high_low, rd_low_used, LowTotal - LowFree);
|
||||
rrddim_set_by_pointer(st_mem_high_low, rd_high_free, HighFree);
|
||||
rrddim_set_by_pointer(st_mem_high_low, rd_low_free, LowFree);
|
||||
rrdset_done(st_mem_high_low);
|
||||
}
|
||||
|
||||
if(do_cma == CONFIG_BOOLEAN_YES || (do_cma == CONFIG_BOOLEAN_AUTO && (arl_cma_total->flags & ARL_ENTRY_FLAG_FOUND) && CmaTotal)) {
|
||||
do_cma = CONFIG_BOOLEAN_YES;
|
||||
|
||||
static RRDSET *st_mem_cma = NULL;
|
||||
static RRDDIM *rd_used = NULL, *rd_free = NULL;
|
||||
|
||||
if(unlikely(!st_mem_cma)) {
|
||||
st_mem_cma = rrdset_create_localhost(
|
||||
"mem"
|
||||
, "cma"
|
||||
, NULL
|
||||
, "cma"
|
||||
, NULL
|
||||
, "Contiguous Memory Allocator (CMA) Memory"
|
||||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_CMA
|
||||
, update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
rd_used = rrddim_add(st_mem_cma, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
rd_free = rrddim_add(st_mem_cma, "free", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
rrddim_set_by_pointer(st_mem_cma, rd_used, CmaTotal - CmaFree);
|
||||
rrddim_set_by_pointer(st_mem_cma, rd_free, CmaFree);
|
||||
rrdset_done(st_mem_cma);
|
||||
}
|
||||
|
||||
if(do_directmap != CONFIG_BOOLEAN_NO &&
|
||||
((arl_directmap4k->flags & ARL_ENTRY_FLAG_FOUND) ||
|
||||
(arl_directmap2m->flags & ARL_ENTRY_FLAG_FOUND) ||
|
||||
(arl_directmap4m->flags & ARL_ENTRY_FLAG_FOUND) ||
|
||||
(arl_directmap1g->flags & ARL_ENTRY_FLAG_FOUND)))
|
||||
{
|
||||
static RRDSET *st_mem_directmap = NULL;
|
||||
static RRDDIM *rd_4k = NULL, *rd_2m = NULL, *rd_1g = NULL, *rd_4m = NULL;
|
||||
|
||||
if(unlikely(!st_mem_directmap)) {
|
||||
st_mem_directmap = rrdset_create_localhost(
|
||||
"mem"
|
||||
, "directmaps"
|
||||
, NULL
|
||||
, "overview"
|
||||
, NULL
|
||||
, "Direct Memory Mappings"
|
||||
, "MiB"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_MEMINFO_NAME
|
||||
, NETDATA_CHART_PRIO_MEM_DIRECTMAP
|
||||
, update_every
|
||||
, RRDSET_TYPE_STACKED
|
||||
);
|
||||
|
||||
if(arl_directmap4k->flags & ARL_ENTRY_FLAG_FOUND)
|
||||
rd_4k = rrddim_add(st_mem_directmap, "4k", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
|
||||
if(arl_directmap2m->flags & ARL_ENTRY_FLAG_FOUND)
|
||||
rd_2m = rrddim_add(st_mem_directmap, "2m", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
|
||||
if(arl_directmap4m->flags & ARL_ENTRY_FLAG_FOUND)
|
||||
rd_4m = rrddim_add(st_mem_directmap, "4m", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
|
||||
if(arl_directmap1g->flags & ARL_ENTRY_FLAG_FOUND)
|
||||
rd_1g = rrddim_add(st_mem_directmap, "1g", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
if(rd_4k)
|
||||
rrddim_set_by_pointer(st_mem_directmap, rd_4k, DirectMap4k);
|
||||
|
||||
if(rd_2m)
|
||||
rrddim_set_by_pointer(st_mem_directmap, rd_2m, DirectMap2M);
|
||||
|
||||
if(rd_4m)
|
||||
rrddim_set_by_pointer(st_mem_directmap, rd_4m, DirectMap4M);
|
||||
|
||||
if(rd_1g)
|
||||
rrddim_set_by_pointer(st_mem_directmap, rd_1g, DirectMap1G);
|
||||
|
||||
rrdset_done(st_mem_directmap);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -12,28 +12,55 @@ static int pressure_update_every = 0;
|
|||
|
||||
static struct pressure resources[PRESSURE_NUM_RESOURCES] = {
|
||||
{
|
||||
.some =
|
||||
{.share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
|
||||
.total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}},
|
||||
.full =
|
||||
{.share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
|
||||
.total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}},
|
||||
.some = {
|
||||
.available = true,
|
||||
.share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
|
||||
.total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}
|
||||
},
|
||||
.full = {
|
||||
// Disable CPU full pressure.
|
||||
// See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8
|
||||
.available = false,
|
||||
.share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
|
||||
.total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}
|
||||
},
|
||||
},
|
||||
{
|
||||
.some =
|
||||
{.share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
|
||||
.total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}},
|
||||
.full =
|
||||
{.share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
|
||||
.total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}},
|
||||
.some = {
|
||||
.available = true,
|
||||
.share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
|
||||
.total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}
|
||||
},
|
||||
.full = {
|
||||
.available = true,
|
||||
.share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
|
||||
.total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}
|
||||
},
|
||||
},
|
||||
{
|
||||
.some =
|
||||
{.share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
|
||||
.total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}},
|
||||
.full =
|
||||
{.share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
|
||||
.total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}},
|
||||
.some = {
|
||||
.available = true,
|
||||
.share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
|
||||
.total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}
|
||||
},
|
||||
.full = {
|
||||
.available = true,
|
||||
.share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
|
||||
.total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}
|
||||
},
|
||||
},
|
||||
{
|
||||
.some = {
|
||||
// this is not available
|
||||
.available = false,
|
||||
.share_time = {.id = "irq_some_pressure", .title = "IRQ some pressure"},
|
||||
.total_time = {.id = "irq_some_pressure_stall_time", .title = "IRQ some pressure stall time"}
|
||||
},
|
||||
.full = {
|
||||
.available = true,
|
||||
.share_time = {.id = "irq_full_pressure", .title = "IRQ full pressure"},
|
||||
.total_time = {.id = "irq_full_pressure_stall_time", .title = "IRQ full pressure stall time"}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -46,6 +73,7 @@ static struct resource_info {
|
|||
{ .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU },
|
||||
{ .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM },
|
||||
{ .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO },
|
||||
{ .name = "irq", .family = "interrupts", .section_priority = NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS },
|
||||
};
|
||||
|
||||
void update_pressure_charts(struct pressure_charts *pcs) {
|
||||
|
@ -65,7 +93,7 @@ void update_pressure_charts(struct pressure_charts *pcs) {
|
|||
}
|
||||
}
|
||||
|
||||
static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
|
||||
static void proc_pressure_do_resource(procfile *ff, int res_idx, size_t line, bool some) {
|
||||
struct pressure_charts *pcs;
|
||||
struct resource_info ri;
|
||||
pcs = some ? &resources[res_idx].some : &resources[res_idx].full;
|
||||
|
@ -93,9 +121,9 @@ static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
|
|||
rrddim_add(pcs->share_time.st, some ? "some 300" : "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
|
||||
}
|
||||
|
||||
pcs->share_time.value10 = strtod(procfile_lineword(ff, some ? 0 : 1, 2), NULL);
|
||||
pcs->share_time.value60 = strtod(procfile_lineword(ff, some ? 0 : 1, 4), NULL);
|
||||
pcs->share_time.value300 = strtod(procfile_lineword(ff, some ? 0 : 1, 6), NULL);
|
||||
pcs->share_time.value10 = strtod(procfile_lineword(ff, line, 2), NULL);
|
||||
pcs->share_time.value60 = strtod(procfile_lineword(ff, line, 4), NULL);
|
||||
pcs->share_time.value300 = strtod(procfile_lineword(ff, line, 6), NULL);
|
||||
|
||||
if (unlikely(!pcs->total_time.st)) {
|
||||
pcs->total_time.st = rrdset_create_localhost(
|
||||
|
@ -114,19 +142,19 @@ static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
|
|||
pcs->total_time.rdtotal = rrddim_add(pcs->total_time.st, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
|
||||
}
|
||||
|
||||
pcs->total_time.value_total = str2ull(procfile_lineword(ff, some ? 0 : 1, 8), NULL) / 1000;
|
||||
pcs->total_time.value_total = str2ull(procfile_lineword(ff, line, 8), NULL) / 1000;
|
||||
}
|
||||
|
||||
static void proc_pressure_do_resource_some(procfile *ff, int res_idx) {
|
||||
proc_pressure_do_resource(ff, res_idx, 1);
|
||||
static void proc_pressure_do_resource_some(procfile *ff, int res_idx, size_t line) {
|
||||
proc_pressure_do_resource(ff, res_idx, line, true);
|
||||
}
|
||||
|
||||
static void proc_pressure_do_resource_full(procfile *ff, int res_idx) {
|
||||
proc_pressure_do_resource(ff, res_idx, 0);
|
||||
static void proc_pressure_do_resource_full(procfile *ff, int res_idx, size_t line) {
|
||||
proc_pressure_do_resource(ff, res_idx, line, false);
|
||||
}
|
||||
|
||||
int do_proc_pressure(int update_every, usec_t dt) {
|
||||
int fail_count = 0;
|
||||
int ok_count = 0;
|
||||
int i;
|
||||
|
||||
static usec_t next_pressure_dt = 0;
|
||||
|
@ -161,56 +189,59 @@ int do_proc_pressure(int update_every, usec_t dt) {
|
|||
, base_path
|
||||
, resource_info[i].name);
|
||||
|
||||
do_some = resources[i].some.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
|
||||
do_full = resources[i].full.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
|
||||
|
||||
snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name);
|
||||
do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
|
||||
do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_some);
|
||||
resources[i].some.enabled = do_some;
|
||||
|
||||
// Disable CPU full pressure.
|
||||
// See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8
|
||||
if (i == 0) {
|
||||
do_full = CONFIG_BOOLEAN_NO;
|
||||
resources[i].full.enabled = do_full;
|
||||
} else {
|
||||
snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
|
||||
do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
|
||||
do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_full);
|
||||
resources[i].full.enabled = do_full;
|
||||
}
|
||||
|
||||
if(!do_full && !do_some)
|
||||
continue;
|
||||
|
||||
ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT);
|
||||
if (unlikely(!ff)) {
|
||||
collector_error("Cannot read pressure information from %s.", filename);
|
||||
fail_count++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ff = procfile_readall(ff);
|
||||
resource_info[i].pf = ff;
|
||||
if (unlikely(!ff)) {
|
||||
fail_count++;
|
||||
if (unlikely(!ff))
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t lines = procfile_lines(ff);
|
||||
if (unlikely(lines < 1)) {
|
||||
collector_error("%s has no lines.", procfile_filename(ff));
|
||||
fail_count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (do_some) {
|
||||
proc_pressure_do_resource_some(ff, i);
|
||||
for(size_t l = 0; l < lines ;l++) {
|
||||
const char *key = procfile_lineword(ff, l, 0);
|
||||
if(strcmp(key, "some") == 0) {
|
||||
if(do_some) {
|
||||
proc_pressure_do_resource_some(ff, i, l);
|
||||
update_pressure_charts(&resources[i].some);
|
||||
ok_count++;
|
||||
}
|
||||
if (do_full && lines > 2) {
|
||||
proc_pressure_do_resource_full(ff, i);
|
||||
}
|
||||
else if(strcmp(key, "full") == 0) {
|
||||
if(do_full) {
|
||||
proc_pressure_do_resource_full(ff, i, l);
|
||||
update_pressure_charts(&resources[i].full);
|
||||
ok_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (PRESSURE_NUM_RESOURCES == fail_count) {
|
||||
if(!ok_count)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -3,13 +3,14 @@
|
|||
#ifndef NETDATA_PROC_PRESSURE_H
|
||||
#define NETDATA_PROC_PRESSURE_H
|
||||
|
||||
#define PRESSURE_NUM_RESOURCES 3
|
||||
#define PRESSURE_NUM_RESOURCES 4
|
||||
|
||||
struct pressure {
|
||||
int updated;
|
||||
char *filename;
|
||||
|
||||
struct pressure_charts {
|
||||
bool available;
|
||||
int enabled;
|
||||
|
||||
struct pressure_share_time_chart {
|
||||
|
|
|
@ -494,7 +494,7 @@ int do_proc_stat(int update_every, usec_t dt) {
|
|||
do_processes = config_get_boolean("plugin:proc:/proc/stat", "processes running", CONFIG_BOOLEAN_YES);
|
||||
|
||||
// give sane defaults based on the number of processors
|
||||
if(unlikely(get_system_cpus() > 50)) {
|
||||
if(unlikely(get_system_cpus() > 128)) {
|
||||
// the system has too many processors
|
||||
keep_per_core_fds_open = CONFIG_BOOLEAN_NO;
|
||||
do_core_throttle_count = CONFIG_BOOLEAN_NO;
|
||||
|
|
|
@ -271,7 +271,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
|
|||
|
||||
if(unlikely(!st_swapio)) {
|
||||
st_swapio = rrdset_create_localhost(
|
||||
"system"
|
||||
"mem"
|
||||
, "swapio"
|
||||
, NULL
|
||||
, "swap"
|
||||
|
@ -280,7 +280,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
|
|||
, "KiB/s"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_VMSTAT_NAME
|
||||
, NETDATA_CHART_PRIO_SYSTEM_SWAPIO
|
||||
, NETDATA_CHART_PRIO_MEM_SWAPIO
|
||||
, update_every
|
||||
, RRDSET_TYPE_AREA
|
||||
);
|
||||
|
@ -336,7 +336,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
|
|||
"mem"
|
||||
, "pgfaults"
|
||||
, NULL
|
||||
, "system"
|
||||
, "page faults"
|
||||
, NULL
|
||||
, "Memory Page Faults"
|
||||
, "faults/s"
|
||||
|
@ -372,7 +372,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
|
|||
"mem"
|
||||
, "oom_kill"
|
||||
, NULL
|
||||
, "system"
|
||||
, "OOM kills"
|
||||
, NULL
|
||||
, "Out of Memory Kills"
|
||||
, "kills/s"
|
||||
|
@ -505,7 +505,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
|
|||
|
||||
if(unlikely(!st_zswapio)) {
|
||||
st_zswapio = rrdset_create_localhost(
|
||||
"system"
|
||||
"mem"
|
||||
, "zswapio"
|
||||
, NULL
|
||||
, "zswap"
|
||||
|
@ -514,7 +514,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
|
|||
, "KiB/s"
|
||||
, PLUGIN_PROC_NAME
|
||||
, PLUGIN_PROC_MODULE_VMSTAT_NAME
|
||||
, NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO
|
||||
, NETDATA_CHART_PRIO_MEM_ZSWAPIO
|
||||
, update_every
|
||||
, RRDSET_TYPE_AREA
|
||||
);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# you can disable an alarm notification by setting the 'to' line to: silent
|
||||
|
||||
alarm: 30min_ram_swapped_out
|
||||
on: system.swapio
|
||||
on: mem.swapio
|
||||
class: Workload
|
||||
type: System
|
||||
component: Memory
|
||||
|
@ -19,7 +19,7 @@ component: Memory
|
|||
to: sysadmin
|
||||
|
||||
alarm: used_swap
|
||||
on: system.swap
|
||||
on: mem.swap
|
||||
class: Utilization
|
||||
type: System
|
||||
component: Memory
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue