mirror of
https://github.com/netdata/netdata.git
synced 2025-04-14 09:38:34 +00:00

* cleanup of logging - wip
* first working iteration
* add errno annotator
* replace old logging functions with netdata_logger()
* cleanup
* update error_limit
* fix remanining error_limit references
* work on fatal()
* started working on structured logs
* full cleanup
* default logging to files; fix all plugins initialization
* fix formatting of numbers
* cleanup and reorg
* fix coverity issues
* cleanup obsolete code
* fix formatting of numbers
* fix log rotation
* fix for older systems
* add detection of systemd journal via stderr
* finished on access.log
* remove left-over transport
* do not add empty fields to the logs
* journal get compact uuids; X-Transaction-ID header is added in web responses
* allow compiling on systems without memfd sealing
* added libnetdata/uuid directory
* move datetime formatters to libnetdata
* add missing files
* link the makefiles in libnetdata
* added uuid_parse_flexi() to parse UUIDs with and without hyphens; the web server now read X-Transaction-ID and uses it for functions and web responses
* added stream receiver, sender, proc plugin and pluginsd log stack
* iso8601 advanced usage; line_splitter module in libnetdata; code cleanup
* add message ids to streaming inbound and outbound connections
* cleanup line_splitter between lines to avoid logging garbage; when killing children, kill them with SIGABRT if internal checks is enabled
* send SIGABRT to external plugins only if we are not shutting down
* fix cross cleanup in pluginsd parser
* fatal when there is a stack error in logs
* compile netdata with -fexceptions
* do not kill external plugins with SIGABRT
* metasync info logs to debug level
* added severity to logs
* added json output; added options per log output; added documentation; fixed issues mentioned
* allow memfd only on linux
* moved journal low level functions to journal.c/h
* move health logs to daemon.log with proper priorities
* fixed a couple of bugs; health log in journal
* updated docs
* systemd-cat-native command to push structured logs to journal from the command line
* fix makefiles
* restored NETDATA_LOG_SEVERITY_LEVEL
* fix makefiles
* systemd-cat-native can also work as the logger of Netdata scripts
* do not require a socket to systemd-journal to log-as-netdata
* alarm notify logs in native format
* properly compare log ids
* fatals log alerts; alarm-notify.sh working
* fix overflow warning
* alarm-notify.sh now logs the request (command line)
* anotate external plugins logs with the function cmd they run
* added context, component and type to alarm-notify.sh; shell sanitization removes control character and characters that may be expanded by bash
* reformatted alarm-notify logs
* unify cgroup-network-helper.sh
* added quotes around params
* charts.d.plugin switched logging to journal native
* quotes for logfmt
* unify the status codes of streaming receivers and senders
* alarm-notify: dont log anything, if there is nothing to do
* all external plugins log to stderr when running outside netdata; alarm-notify now shows an error when notifications menthod are needed but are not available
* migrate cgroup-name.sh to new logging
* systemd-cat-native now supports messages with newlines
* socket.c logs use priority
* cleanup log field types
* inherit the systemd set INVOCATION_ID if found
* allow systemd-cat-native to send messages to a systemd-journal-remote URL
* log2journal command that can convert structured logs to journal export format
* various fixes and documentation of log2journal
* updated log2journal docs
* updated log2journal docs
* updated documentation of fields
* allow compiling without libcurl
* do not use socket as format string
* added version information to newly added tools
* updated documentation and help messages
* fix the namespace socket path
* print errno with error
* do not timeout
* updated docs
* updated docs
* updated docs
* log2journal updated docs and params
* when talking to a remote journal, systemd-cat-native batches the messages
* enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote
* Revert "enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote"
This reverts commit b079d53c11
.
* note about uncompressed traffic
* log2journal: code reorg and cleanup to make modular
* finished rewriting log2journal
* more comments
* rewriting rules support
* increased limits
* updated docs
* updated docs
* fix old log call
* use journal only when stderr is connected to journal
* update netdata.spec for libcurl, libpcre2 and log2journal
* pcre2-devel
* do not require pcre2 in centos < 8, amazonlinux < 2023, open suse
* log2journal only on systems pcre2 is available
* ignore log2journal in .gitignore
* avoid log2journal on centos 7, amazonlinux 2 and opensuse
* add pcre2-8 to static build
* undo last commit
* Bundle to static
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Add build deps for deb packages
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Add dependencies; build from source
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Test build for amazon linux and centos expect to fail for suse
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* fix minor oversight
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Reorg code
* Add the install from source (deps) as a TODO
* Not enable the build on suse ecosystem
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
---------
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
249 lines
9 KiB
C
249 lines
9 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "plugin_proc.h"
|
|
|
|
static struct proc_module {
|
|
const char *name;
|
|
const char *dim;
|
|
|
|
int enabled;
|
|
|
|
int (*func)(int update_every, usec_t dt);
|
|
|
|
RRDDIM *rd;
|
|
|
|
} proc_modules[] = {
|
|
|
|
// system metrics
|
|
{.name = "/proc/stat", .dim = "stat", .func = do_proc_stat},
|
|
{.name = "/proc/uptime", .dim = "uptime", .func = do_proc_uptime},
|
|
{.name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg},
|
|
{.name = "/proc/sys/fs/file-nr", .dim = "file-nr", .func = do_proc_sys_fs_file_nr},
|
|
{.name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail},
|
|
|
|
// pressure metrics
|
|
{.name = "/proc/pressure", .dim = "pressure", .func = do_proc_pressure},
|
|
|
|
// CPU metrics
|
|
{.name = "/proc/interrupts", .dim = "interrupts", .func = do_proc_interrupts},
|
|
{.name = "/proc/softirqs", .dim = "softirqs", .func = do_proc_softirqs},
|
|
|
|
// memory metrics
|
|
{.name = "/proc/vmstat", .dim = "vmstat", .func = do_proc_vmstat},
|
|
{.name = "/proc/meminfo", .dim = "meminfo", .func = do_proc_meminfo},
|
|
{.name = "/sys/kernel/mm/ksm", .dim = "ksm", .func = do_sys_kernel_mm_ksm},
|
|
{.name = "/sys/block/zram", .dim = "zram", .func = do_sys_block_zram},
|
|
{.name = "/sys/devices/system/edac/mc", .dim = "edac", .func = do_proc_sys_devices_system_edac_mc},
|
|
{.name = "/sys/devices/pci/aer", .dim = "pci_aer", .func = do_proc_sys_devices_pci_aer},
|
|
{.name = "/sys/devices/system/node", .dim = "numa", .func = do_proc_sys_devices_system_node},
|
|
{.name = "/proc/pagetypeinfo", .dim = "pagetypeinfo", .func = do_proc_pagetypeinfo},
|
|
|
|
// network metrics
|
|
{.name = "/proc/net/wireless", .dim = "netwireless", .func = do_proc_net_wireless},
|
|
{.name = "/proc/net/sockstat", .dim = "sockstat", .func = do_proc_net_sockstat},
|
|
{.name = "/proc/net/sockstat6", .dim = "sockstat6", .func = do_proc_net_sockstat6},
|
|
{.name = "/proc/net/netstat", .dim = "netstat", .func = do_proc_net_netstat},
|
|
{.name = "/proc/net/sctp/snmp", .dim = "sctp", .func = do_proc_net_sctp_snmp},
|
|
{.name = "/proc/net/softnet_stat", .dim = "softnet", .func = do_proc_net_softnet_stat},
|
|
{.name = "/proc/net/ip_vs/stats", .dim = "ipvs", .func = do_proc_net_ip_vs_stats},
|
|
{.name = "/sys/class/infiniband", .dim = "infiniband", .func = do_sys_class_infiniband},
|
|
|
|
// firewall metrics
|
|
{.name = "/proc/net/stat/conntrack", .dim = "conntrack", .func = do_proc_net_stat_conntrack},
|
|
{.name = "/proc/net/stat/synproxy", .dim = "synproxy", .func = do_proc_net_stat_synproxy},
|
|
|
|
// disk metrics
|
|
{.name = "/proc/diskstats", .dim = "diskstats", .func = do_proc_diskstats},
|
|
{.name = "/proc/mdstat", .dim = "mdstat", .func = do_proc_mdstat},
|
|
|
|
// NFS metrics
|
|
{.name = "/proc/net/rpc/nfsd", .dim = "nfsd", .func = do_proc_net_rpc_nfsd},
|
|
{.name = "/proc/net/rpc/nfs", .dim = "nfs", .func = do_proc_net_rpc_nfs},
|
|
|
|
// ZFS metrics
|
|
{.name = "/proc/spl/kstat/zfs/arcstats", .dim = "zfs_arcstats", .func = do_proc_spl_kstat_zfs_arcstats},
|
|
{.name = "/proc/spl/kstat/zfs/pool/state",.dim = "zfs_pool_state",.func = do_proc_spl_kstat_zfs_pool_state},
|
|
|
|
// BTRFS metrics
|
|
{.name = "/sys/fs/btrfs", .dim = "btrfs", .func = do_sys_fs_btrfs},
|
|
|
|
// IPC metrics
|
|
{.name = "ipc", .dim = "ipc", .func = do_ipc},
|
|
|
|
// linux power supply metrics
|
|
{.name = "/sys/class/power_supply", .dim = "power_supply", .func = do_sys_class_power_supply},
|
|
|
|
// GPU metrics
|
|
{.name = "/sys/class/drm", .dim = "drm", .func = do_sys_class_drm},
|
|
|
|
// the terminator of this array
|
|
{.name = NULL, .dim = NULL, .func = NULL}
|
|
};
|
|
|
|
#if WORKER_UTILIZATION_MAX_JOB_TYPES < 36
|
|
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 36
|
|
#endif
|
|
|
|
static netdata_thread_t *netdev_thread = NULL;
|
|
|
|
static void proc_main_cleanup(void *ptr)
|
|
{
|
|
struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
|
|
static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
|
|
|
|
collector_info("cleaning up...");
|
|
|
|
if (netdev_thread) {
|
|
netdata_thread_join(*netdev_thread, NULL);
|
|
freez(netdev_thread);
|
|
}
|
|
|
|
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
|
|
|
|
worker_unregister();
|
|
}
|
|
|
|
bool inside_lxc_container = false;
|
|
|
|
static bool is_lxcfs_proc_mounted() {
|
|
procfile *ff = NULL;
|
|
|
|
if (unlikely(!ff)) {
|
|
char filename[FILENAME_MAX + 1];
|
|
snprintfz(filename, FILENAME_MAX, "/proc/self/mounts");
|
|
ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
|
|
if (unlikely(!ff))
|
|
return false;
|
|
}
|
|
|
|
ff = procfile_readall(ff);
|
|
if (unlikely(!ff))
|
|
return false;
|
|
|
|
unsigned long l, lines = procfile_lines(ff);
|
|
|
|
for (l = 0; l < lines; l++) {
|
|
size_t words = procfile_linewords(ff, l);
|
|
if (words < 2) {
|
|
continue;
|
|
}
|
|
if (!strcmp(procfile_lineword(ff, l, 0), "lxcfs") && !strncmp(procfile_lineword(ff, l, 1), "/proc", 5)) {
|
|
procfile_close(ff);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
procfile_close(ff);
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool log_proc_module(BUFFER *wb, void *data) {
|
|
struct proc_module *pm = data;
|
|
buffer_sprintf(wb, "proc.plugin[%s]", pm->name);
|
|
return true;
|
|
}
|
|
|
|
void *proc_main(void *ptr)
|
|
{
|
|
worker_register("PROC");
|
|
|
|
rrd_collector_started();
|
|
|
|
if (config_get_boolean("plugin:proc", "/proc/net/dev", CONFIG_BOOLEAN_YES)) {
|
|
netdev_thread = mallocz(sizeof(netdata_thread_t));
|
|
netdata_log_debug(D_SYSTEM, "Starting thread %s.", THREAD_NETDEV_NAME);
|
|
netdata_thread_create(
|
|
netdev_thread, THREAD_NETDEV_NAME, NETDATA_THREAD_OPTION_JOINABLE, netdev_main, netdev_thread);
|
|
}
|
|
|
|
netdata_thread_cleanup_push(proc_main_cleanup, ptr);
|
|
|
|
{
|
|
config_get_boolean("plugin:proc", "/proc/pagetypeinfo", CONFIG_BOOLEAN_NO);
|
|
|
|
// check the enabled status for each module
|
|
int i;
|
|
for(i = 0; proc_modules[i].name; i++) {
|
|
struct proc_module *pm = &proc_modules[i];
|
|
|
|
pm->enabled = config_get_boolean("plugin:proc", pm->name, CONFIG_BOOLEAN_YES);
|
|
pm->rd = NULL;
|
|
|
|
worker_register_job_name(i, proc_modules[i].dim);
|
|
}
|
|
|
|
usec_t step = localhost->rrd_update_every * USEC_PER_SEC;
|
|
heartbeat_t hb;
|
|
heartbeat_init(&hb);
|
|
|
|
inside_lxc_container = is_lxcfs_proc_mounted();
|
|
|
|
#define LGS_MODULE_ID 0
|
|
|
|
ND_LOG_STACK lgs[] = {
|
|
[LGS_MODULE_ID] = ND_LOG_FIELD_TXT(NDF_MODULE, "proc.plugin"),
|
|
ND_LOG_FIELD_END(),
|
|
};
|
|
ND_LOG_STACK_PUSH(lgs);
|
|
|
|
while(service_running(SERVICE_COLLECTORS)) {
|
|
worker_is_idle();
|
|
usec_t hb_dt = heartbeat_next(&hb, step);
|
|
|
|
if(unlikely(!service_running(SERVICE_COLLECTORS)))
|
|
break;
|
|
|
|
for(i = 0; proc_modules[i].name; i++) {
|
|
if(unlikely(!service_running(SERVICE_COLLECTORS)))
|
|
break;
|
|
|
|
struct proc_module *pm = &proc_modules[i];
|
|
if(unlikely(!pm->enabled))
|
|
continue;
|
|
|
|
worker_is_busy(i);
|
|
lgs[LGS_MODULE_ID] = ND_LOG_FIELD_CB(NDF_MODULE, log_proc_module, pm);
|
|
pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt);
|
|
lgs[LGS_MODULE_ID] = ND_LOG_FIELD_TXT(NDF_MODULE, "proc.plugin");
|
|
}
|
|
}
|
|
}
|
|
|
|
netdata_thread_cleanup_pop(1);
|
|
return NULL;
|
|
}
|
|
|
|
int get_numa_node_count(void)
|
|
{
|
|
static int numa_node_count = -1;
|
|
|
|
if (numa_node_count != -1)
|
|
return numa_node_count;
|
|
|
|
numa_node_count = 0;
|
|
|
|
char name[FILENAME_MAX + 1];
|
|
snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/node");
|
|
char *dirname = config_get("plugin:proc:/sys/devices/system/node", "directory to monitor", name);
|
|
|
|
DIR *dir = opendir(dirname);
|
|
if (dir) {
|
|
struct dirent *de = NULL;
|
|
while ((de = readdir(dir))) {
|
|
if (de->d_type != DT_DIR)
|
|
continue;
|
|
|
|
if (strncmp(de->d_name, "node", 4) != 0)
|
|
continue;
|
|
|
|
if (!isdigit(de->d_name[4]))
|
|
continue;
|
|
|
|
numa_node_count++;
|
|
}
|
|
closedir(dir);
|
|
}
|
|
|
|
return numa_node_count;
|
|
}
|