0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-14 09:38:34 +00:00
netdata_netdata/collectors/proc.plugin/plugin_proc.c
Costa Tsaousis 3e508c8f95
New logging layer ()
* cleanup of logging - wip

* first working iteration

* add errno annotator

* replace old logging functions with netdata_logger()

* cleanup

* update error_limit

* fix remanining error_limit references

* work on fatal()

* started working on structured logs

* full cleanup

* default logging to files; fix all plugins initialization

* fix formatting of numbers

* cleanup and reorg

* fix coverity issues

* cleanup obsolete code

* fix formatting of numbers

* fix log rotation

* fix for older systems

* add detection of systemd journal via stderr

* finished on access.log

* remove left-over transport

* do not add empty fields to the logs

* journal get compact uuids; X-Transaction-ID header is added in web responses

* allow compiling on systems without memfd sealing

* added libnetdata/uuid directory

* move datetime formatters to libnetdata

* add missing files

* link the makefiles in libnetdata

* added uuid_parse_flexi() to parse UUIDs with and without hyphens; the web server now read X-Transaction-ID and uses it for functions and web responses

* added stream receiver, sender, proc plugin and pluginsd log stack

* iso8601 advanced usage; line_splitter module in libnetdata; code cleanup

* add message ids to streaming inbound and outbound connections

* cleanup line_splitter between lines to avoid logging garbage; when killing children, kill them with SIGABRT if internal checks is enabled

* send SIGABRT to external plugins only if we are not shutting down

* fix cross cleanup in pluginsd parser

* fatal when there is a stack error in logs

* compile netdata with -fexceptions

* do not kill external plugins with SIGABRT

* metasync info logs to debug level

* added severity to logs

* added json output; added options per log output; added documentation; fixed issues mentioned

* allow memfd only on linux

* moved journal low level functions to journal.c/h

* move health logs to daemon.log with proper priorities

* fixed a couple of bugs; health log in journal

* updated docs

* systemd-cat-native command to push structured logs to journal from the command line

* fix makefiles

* restored NETDATA_LOG_SEVERITY_LEVEL

* fix makefiles

* systemd-cat-native can also work as the logger of Netdata scripts

* do not require a socket to systemd-journal to log-as-netdata

* alarm notify logs in native format

* properly compare log ids

* fatals log alerts; alarm-notify.sh working

* fix overflow warning

* alarm-notify.sh now logs the request (command line)

* anotate external plugins logs with the function cmd they run

* added context, component and type to alarm-notify.sh; shell sanitization removes control character and characters that may be expanded by bash

* reformatted alarm-notify logs

* unify cgroup-network-helper.sh

* added quotes around params

* charts.d.plugin switched logging to journal native

* quotes for logfmt

* unify the status codes of streaming receivers and senders

* alarm-notify: dont log anything, if there is nothing to do

* all external plugins log to stderr when running outside netdata; alarm-notify now shows an error when notifications menthod are needed but are not available

* migrate cgroup-name.sh to new logging

* systemd-cat-native now supports messages with newlines

* socket.c logs use priority

* cleanup log field types

* inherit the systemd set INVOCATION_ID if found

* allow systemd-cat-native to send messages to a systemd-journal-remote URL

* log2journal command that can convert structured logs to journal export format

* various fixes and documentation of log2journal

* updated log2journal docs

* updated log2journal docs

* updated documentation of fields

* allow compiling without libcurl

* do not use socket as format string

* added version information to newly added tools

* updated documentation and help messages

* fix the namespace socket path

* print errno with error

* do not timeout

* updated docs

* updated docs

* updated docs

* log2journal updated docs and params

* when talking to a remote journal, systemd-cat-native batches the messages

* enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote

* Revert "enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote"

This reverts commit b079d53c11.

* note about uncompressed traffic

* log2journal: code reorg and cleanup to make modular

* finished rewriting log2journal

* more comments

* rewriting rules support

* increased limits

* updated docs

* updated docs

* fix old log call

* use journal only when stderr is connected to journal

* update netdata.spec for libcurl, libpcre2 and log2journal

* pcre2-devel

* do not require pcre2 in centos < 8, amazonlinux < 2023, open suse

* log2journal only on systems pcre2 is available

* ignore log2journal in .gitignore

* avoid log2journal on centos 7, amazonlinux 2 and opensuse

* add pcre2-8 to static build

* undo last commit

* Bundle to static

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Add build deps for deb packages

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Add dependencies; build from source

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Test build for amazon linux and centos expect to fail for suse

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* fix minor oversight

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Reorg code

* Add the install from source (deps) as a TODO
* Not enable the build on suse ecosystem

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

---------

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
2023-11-22 10:27:25 +02:00

249 lines
9 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "plugin_proc.h"
static struct proc_module {
const char *name;
const char *dim;
int enabled;
int (*func)(int update_every, usec_t dt);
RRDDIM *rd;
} proc_modules[] = {
// system metrics
{.name = "/proc/stat", .dim = "stat", .func = do_proc_stat},
{.name = "/proc/uptime", .dim = "uptime", .func = do_proc_uptime},
{.name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg},
{.name = "/proc/sys/fs/file-nr", .dim = "file-nr", .func = do_proc_sys_fs_file_nr},
{.name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail},
// pressure metrics
{.name = "/proc/pressure", .dim = "pressure", .func = do_proc_pressure},
// CPU metrics
{.name = "/proc/interrupts", .dim = "interrupts", .func = do_proc_interrupts},
{.name = "/proc/softirqs", .dim = "softirqs", .func = do_proc_softirqs},
// memory metrics
{.name = "/proc/vmstat", .dim = "vmstat", .func = do_proc_vmstat},
{.name = "/proc/meminfo", .dim = "meminfo", .func = do_proc_meminfo},
{.name = "/sys/kernel/mm/ksm", .dim = "ksm", .func = do_sys_kernel_mm_ksm},
{.name = "/sys/block/zram", .dim = "zram", .func = do_sys_block_zram},
{.name = "/sys/devices/system/edac/mc", .dim = "edac", .func = do_proc_sys_devices_system_edac_mc},
{.name = "/sys/devices/pci/aer", .dim = "pci_aer", .func = do_proc_sys_devices_pci_aer},
{.name = "/sys/devices/system/node", .dim = "numa", .func = do_proc_sys_devices_system_node},
{.name = "/proc/pagetypeinfo", .dim = "pagetypeinfo", .func = do_proc_pagetypeinfo},
// network metrics
{.name = "/proc/net/wireless", .dim = "netwireless", .func = do_proc_net_wireless},
{.name = "/proc/net/sockstat", .dim = "sockstat", .func = do_proc_net_sockstat},
{.name = "/proc/net/sockstat6", .dim = "sockstat6", .func = do_proc_net_sockstat6},
{.name = "/proc/net/netstat", .dim = "netstat", .func = do_proc_net_netstat},
{.name = "/proc/net/sctp/snmp", .dim = "sctp", .func = do_proc_net_sctp_snmp},
{.name = "/proc/net/softnet_stat", .dim = "softnet", .func = do_proc_net_softnet_stat},
{.name = "/proc/net/ip_vs/stats", .dim = "ipvs", .func = do_proc_net_ip_vs_stats},
{.name = "/sys/class/infiniband", .dim = "infiniband", .func = do_sys_class_infiniband},
// firewall metrics
{.name = "/proc/net/stat/conntrack", .dim = "conntrack", .func = do_proc_net_stat_conntrack},
{.name = "/proc/net/stat/synproxy", .dim = "synproxy", .func = do_proc_net_stat_synproxy},
// disk metrics
{.name = "/proc/diskstats", .dim = "diskstats", .func = do_proc_diskstats},
{.name = "/proc/mdstat", .dim = "mdstat", .func = do_proc_mdstat},
// NFS metrics
{.name = "/proc/net/rpc/nfsd", .dim = "nfsd", .func = do_proc_net_rpc_nfsd},
{.name = "/proc/net/rpc/nfs", .dim = "nfs", .func = do_proc_net_rpc_nfs},
// ZFS metrics
{.name = "/proc/spl/kstat/zfs/arcstats", .dim = "zfs_arcstats", .func = do_proc_spl_kstat_zfs_arcstats},
{.name = "/proc/spl/kstat/zfs/pool/state",.dim = "zfs_pool_state",.func = do_proc_spl_kstat_zfs_pool_state},
// BTRFS metrics
{.name = "/sys/fs/btrfs", .dim = "btrfs", .func = do_sys_fs_btrfs},
// IPC metrics
{.name = "ipc", .dim = "ipc", .func = do_ipc},
// linux power supply metrics
{.name = "/sys/class/power_supply", .dim = "power_supply", .func = do_sys_class_power_supply},
// GPU metrics
{.name = "/sys/class/drm", .dim = "drm", .func = do_sys_class_drm},
// the terminator of this array
{.name = NULL, .dim = NULL, .func = NULL}
};
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 36
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 36
#endif
static netdata_thread_t *netdev_thread = NULL;
static void proc_main_cleanup(void *ptr)
{
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
collector_info("cleaning up...");
if (netdev_thread) {
netdata_thread_join(*netdev_thread, NULL);
freez(netdev_thread);
}
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
worker_unregister();
}
bool inside_lxc_container = false;
static bool is_lxcfs_proc_mounted() {
procfile *ff = NULL;
if (unlikely(!ff)) {
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "/proc/self/mounts");
ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
if (unlikely(!ff))
return false;
}
ff = procfile_readall(ff);
if (unlikely(!ff))
return false;
unsigned long l, lines = procfile_lines(ff);
for (l = 0; l < lines; l++) {
size_t words = procfile_linewords(ff, l);
if (words < 2) {
continue;
}
if (!strcmp(procfile_lineword(ff, l, 0), "lxcfs") && !strncmp(procfile_lineword(ff, l, 1), "/proc", 5)) {
procfile_close(ff);
return true;
}
}
procfile_close(ff);
return false;
}
static bool log_proc_module(BUFFER *wb, void *data) {
struct proc_module *pm = data;
buffer_sprintf(wb, "proc.plugin[%s]", pm->name);
return true;
}
void *proc_main(void *ptr)
{
worker_register("PROC");
rrd_collector_started();
if (config_get_boolean("plugin:proc", "/proc/net/dev", CONFIG_BOOLEAN_YES)) {
netdev_thread = mallocz(sizeof(netdata_thread_t));
netdata_log_debug(D_SYSTEM, "Starting thread %s.", THREAD_NETDEV_NAME);
netdata_thread_create(
netdev_thread, THREAD_NETDEV_NAME, NETDATA_THREAD_OPTION_JOINABLE, netdev_main, netdev_thread);
}
netdata_thread_cleanup_push(proc_main_cleanup, ptr);
{
config_get_boolean("plugin:proc", "/proc/pagetypeinfo", CONFIG_BOOLEAN_NO);
// check the enabled status for each module
int i;
for(i = 0; proc_modules[i].name; i++) {
struct proc_module *pm = &proc_modules[i];
pm->enabled = config_get_boolean("plugin:proc", pm->name, CONFIG_BOOLEAN_YES);
pm->rd = NULL;
worker_register_job_name(i, proc_modules[i].dim);
}
usec_t step = localhost->rrd_update_every * USEC_PER_SEC;
heartbeat_t hb;
heartbeat_init(&hb);
inside_lxc_container = is_lxcfs_proc_mounted();
#define LGS_MODULE_ID 0
ND_LOG_STACK lgs[] = {
[LGS_MODULE_ID] = ND_LOG_FIELD_TXT(NDF_MODULE, "proc.plugin"),
ND_LOG_FIELD_END(),
};
ND_LOG_STACK_PUSH(lgs);
while(service_running(SERVICE_COLLECTORS)) {
worker_is_idle();
usec_t hb_dt = heartbeat_next(&hb, step);
if(unlikely(!service_running(SERVICE_COLLECTORS)))
break;
for(i = 0; proc_modules[i].name; i++) {
if(unlikely(!service_running(SERVICE_COLLECTORS)))
break;
struct proc_module *pm = &proc_modules[i];
if(unlikely(!pm->enabled))
continue;
worker_is_busy(i);
lgs[LGS_MODULE_ID] = ND_LOG_FIELD_CB(NDF_MODULE, log_proc_module, pm);
pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt);
lgs[LGS_MODULE_ID] = ND_LOG_FIELD_TXT(NDF_MODULE, "proc.plugin");
}
}
}
netdata_thread_cleanup_pop(1);
return NULL;
}
int get_numa_node_count(void)
{
static int numa_node_count = -1;
if (numa_node_count != -1)
return numa_node_count;
numa_node_count = 0;
char name[FILENAME_MAX + 1];
snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/node");
char *dirname = config_get("plugin:proc:/sys/devices/system/node", "directory to monitor", name);
DIR *dir = opendir(dirname);
if (dir) {
struct dirent *de = NULL;
while ((de = readdir(dir))) {
if (de->d_type != DT_DIR)
continue;
if (strncmp(de->d_name, "node", 4) != 0)
continue;
if (!isdigit(de->d_name[4]))
continue;
numa_node_count++;
}
closedir(dir);
}
return numa_node_count;
}