mirror of
https://github.com/netdata/netdata.git
synced 2025-04-14 01:29:11 +00:00

* cleanup of logging - wip
* first working iteration
* add errno annotator
* replace old logging functions with netdata_logger()
* cleanup
* update error_limit
* fix remanining error_limit references
* work on fatal()
* started working on structured logs
* full cleanup
* default logging to files; fix all plugins initialization
* fix formatting of numbers
* cleanup and reorg
* fix coverity issues
* cleanup obsolete code
* fix formatting of numbers
* fix log rotation
* fix for older systems
* add detection of systemd journal via stderr
* finished on access.log
* remove left-over transport
* do not add empty fields to the logs
* journal get compact uuids; X-Transaction-ID header is added in web responses
* allow compiling on systems without memfd sealing
* added libnetdata/uuid directory
* move datetime formatters to libnetdata
* add missing files
* link the makefiles in libnetdata
* added uuid_parse_flexi() to parse UUIDs with and without hyphens; the web server now read X-Transaction-ID and uses it for functions and web responses
* added stream receiver, sender, proc plugin and pluginsd log stack
* iso8601 advanced usage; line_splitter module in libnetdata; code cleanup
* add message ids to streaming inbound and outbound connections
* cleanup line_splitter between lines to avoid logging garbage; when killing children, kill them with SIGABRT if internal checks is enabled
* send SIGABRT to external plugins only if we are not shutting down
* fix cross cleanup in pluginsd parser
* fatal when there is a stack error in logs
* compile netdata with -fexceptions
* do not kill external plugins with SIGABRT
* metasync info logs to debug level
* added severity to logs
* added json output; added options per log output; added documentation; fixed issues mentioned
* allow memfd only on linux
* moved journal low level functions to journal.c/h
* move health logs to daemon.log with proper priorities
* fixed a couple of bugs; health log in journal
* updated docs
* systemd-cat-native command to push structured logs to journal from the command line
* fix makefiles
* restored NETDATA_LOG_SEVERITY_LEVEL
* fix makefiles
* systemd-cat-native can also work as the logger of Netdata scripts
* do not require a socket to systemd-journal to log-as-netdata
* alarm notify logs in native format
* properly compare log ids
* fatals log alerts; alarm-notify.sh working
* fix overflow warning
* alarm-notify.sh now logs the request (command line)
* anotate external plugins logs with the function cmd they run
* added context, component and type to alarm-notify.sh; shell sanitization removes control character and characters that may be expanded by bash
* reformatted alarm-notify logs
* unify cgroup-network-helper.sh
* added quotes around params
* charts.d.plugin switched logging to journal native
* quotes for logfmt
* unify the status codes of streaming receivers and senders
* alarm-notify: dont log anything, if there is nothing to do
* all external plugins log to stderr when running outside netdata; alarm-notify now shows an error when notifications menthod are needed but are not available
* migrate cgroup-name.sh to new logging
* systemd-cat-native now supports messages with newlines
* socket.c logs use priority
* cleanup log field types
* inherit the systemd set INVOCATION_ID if found
* allow systemd-cat-native to send messages to a systemd-journal-remote URL
* log2journal command that can convert structured logs to journal export format
* various fixes and documentation of log2journal
* updated log2journal docs
* updated log2journal docs
* updated documentation of fields
* allow compiling without libcurl
* do not use socket as format string
* added version information to newly added tools
* updated documentation and help messages
* fix the namespace socket path
* print errno with error
* do not timeout
* updated docs
* updated docs
* updated docs
* log2journal updated docs and params
* when talking to a remote journal, systemd-cat-native batches the messages
* enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote
* Revert "enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote"
This reverts commit b079d53c11
.
* note about uncompressed traffic
* log2journal: code reorg and cleanup to make modular
* finished rewriting log2journal
* more comments
* rewriting rules support
* increased limits
* updated docs
* updated docs
* fix old log call
* use journal only when stderr is connected to journal
* update netdata.spec for libcurl, libpcre2 and log2journal
* pcre2-devel
* do not require pcre2 in centos < 8, amazonlinux < 2023, open suse
* log2journal only on systems pcre2 is available
* ignore log2journal in .gitignore
* avoid log2journal on centos 7, amazonlinux 2 and opensuse
* add pcre2-8 to static build
* undo last commit
* Bundle to static
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Add build deps for deb packages
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Add dependencies; build from source
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Test build for amazon linux and centos expect to fail for suse
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* fix minor oversight
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Reorg code
* Add the install from source (deps) as a TODO
* Not enable the build on suse ecosystem
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
---------
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
253 lines
9.1 KiB
C
253 lines
9.1 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "common.h"
|
|
|
|
typedef enum signal_action {
|
|
NETDATA_SIGNAL_END_OF_LIST,
|
|
NETDATA_SIGNAL_IGNORE,
|
|
NETDATA_SIGNAL_EXIT_CLEANLY,
|
|
NETDATA_SIGNAL_SAVE_DATABASE,
|
|
NETDATA_SIGNAL_REOPEN_LOGS,
|
|
NETDATA_SIGNAL_RELOAD_HEALTH,
|
|
NETDATA_SIGNAL_FATAL,
|
|
NETDATA_SIGNAL_CHILD,
|
|
} SIGNAL_ACTION;
|
|
|
|
static struct {
|
|
int signo; // the signal
|
|
const char *name; // the name of the signal
|
|
size_t count; // the number of signals received
|
|
SIGNAL_ACTION action; // the action to take
|
|
} signals_waiting[] = {
|
|
{ SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE },
|
|
{ SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
|
|
{ SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
|
|
{ SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
|
|
{ SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS },
|
|
{ SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE },
|
|
{ SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH },
|
|
{ SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL },
|
|
{ SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD },
|
|
|
|
// terminator
|
|
{ 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST }
|
|
};
|
|
|
|
static void signal_handler(int signo) {
|
|
// find the entry in the list
|
|
int i;
|
|
for(i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST ; i++) {
|
|
if(unlikely(signals_waiting[i].signo == signo)) {
|
|
signals_waiting[i].count++;
|
|
|
|
if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) {
|
|
char buffer[200 + 1];
|
|
snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name);
|
|
if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) {
|
|
// nothing to do - we cannot write but there is no way to complain about it
|
|
;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void signals_block(void) {
|
|
sigset_t sigset;
|
|
sigfillset(&sigset);
|
|
|
|
if(pthread_sigmask(SIG_BLOCK, &sigset, NULL) == -1)
|
|
netdata_log_error("SIGNAL: Could not block signals for threads");
|
|
}
|
|
|
|
void signals_unblock(void) {
|
|
sigset_t sigset;
|
|
sigfillset(&sigset);
|
|
|
|
if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) {
|
|
netdata_log_error("SIGNAL: Could not unblock signals for threads");
|
|
}
|
|
}
|
|
|
|
void signals_init(void) {
|
|
// Catch signals which we want to use
|
|
struct sigaction sa;
|
|
sa.sa_flags = 0;
|
|
|
|
// ignore all signals while we run in a signal handler
|
|
sigfillset(&sa.sa_mask);
|
|
|
|
int i;
|
|
for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
|
|
switch (signals_waiting[i].action) {
|
|
case NETDATA_SIGNAL_IGNORE:
|
|
sa.sa_handler = SIG_IGN;
|
|
break;
|
|
default:
|
|
sa.sa_handler = signal_handler;
|
|
break;
|
|
}
|
|
|
|
if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
|
|
netdata_log_error("SIGNAL: Failed to change signal handler for: %s", signals_waiting[i].name);
|
|
}
|
|
}
|
|
|
|
void signals_restore_SIGCHLD(void)
|
|
{
|
|
struct sigaction sa;
|
|
|
|
sa.sa_flags = 0;
|
|
sigfillset(&sa.sa_mask);
|
|
sa.sa_handler = signal_handler;
|
|
|
|
if(sigaction(SIGCHLD, &sa, NULL) == -1)
|
|
netdata_log_error("SIGNAL: Failed to change signal handler for: SIGCHLD");
|
|
}
|
|
|
|
void signals_reset(void) {
|
|
struct sigaction sa;
|
|
sigemptyset(&sa.sa_mask);
|
|
sa.sa_handler = SIG_DFL;
|
|
sa.sa_flags = 0;
|
|
|
|
int i;
|
|
for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
|
|
if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
|
|
netdata_log_error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name);
|
|
}
|
|
}
|
|
|
|
// reap_child reaps the child identified by pid.
|
|
static void reap_child(pid_t pid) {
|
|
siginfo_t i;
|
|
|
|
errno = 0;
|
|
netdata_log_debug(D_CHILDS, "SIGNAL: reap_child(%d)...", pid);
|
|
if (netdata_waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
|
|
if (errno != ECHILD)
|
|
netdata_log_error("SIGNAL: waitid(%d): failed to wait for child", pid);
|
|
else
|
|
netdata_log_info("SIGNAL: waitid(%d): failed - it seems the child is already reaped", pid);
|
|
return;
|
|
}
|
|
else if (i.si_pid == 0) {
|
|
// Process didn't exit, this shouldn't happen.
|
|
netdata_log_error("SIGNAL: waitid(%d): reports pid 0 - child has not exited", pid);
|
|
return;
|
|
}
|
|
|
|
switch (i.si_code) {
|
|
case CLD_EXITED:
|
|
netdata_log_info("SIGNAL: reap_child(%d) exited with code: %d", pid, i.si_status);
|
|
break;
|
|
case CLD_KILLED:
|
|
netdata_log_info("SIGNAL: reap_child(%d) killed by signal: %d", pid, i.si_status);
|
|
break;
|
|
case CLD_DUMPED:
|
|
netdata_log_info("SIGNAL: reap_child(%d) dumped core by signal: %d", pid, i.si_status);
|
|
break;
|
|
case CLD_STOPPED:
|
|
netdata_log_info("SIGNAL: reap_child(%d) stopped by signal: %d", pid, i.si_status);
|
|
break;
|
|
case CLD_TRAPPED:
|
|
netdata_log_info("SIGNAL: reap_child(%d) trapped by signal: %d", pid, i.si_status);
|
|
break;
|
|
case CLD_CONTINUED:
|
|
netdata_log_info("SIGNAL: reap_child(%d) continued by signal: %d", pid, i.si_status);
|
|
break;
|
|
default:
|
|
netdata_log_info("SIGNAL: reap_child(%d) gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// reap_children reaps all pending children which are not managed by myp.
|
|
static void reap_children() {
|
|
siginfo_t i;
|
|
|
|
while(1) {
|
|
i.si_pid = 0;
|
|
if (netdata_waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1 || i.si_pid == 0)
|
|
// nothing to do
|
|
return;
|
|
|
|
reap_child(i.si_pid);
|
|
}
|
|
}
|
|
|
|
void signals_handle(void) {
|
|
while(1) {
|
|
|
|
// pause() causes the calling process (or thread) to sleep until a signal
|
|
// is delivered that either terminates the process or causes the invocation
|
|
// of a signal-catching function.
|
|
if(pause() == -1 && errno == EINTR) {
|
|
|
|
// loop once, but keep looping while signals are coming in
|
|
// this is needed because a few operations may take some time
|
|
// so we need to check for new signals before pausing again
|
|
int found = 1;
|
|
while(found) {
|
|
found = 0;
|
|
|
|
// execute the actions of the signals
|
|
int i;
|
|
for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
|
|
if (signals_waiting[i].count) {
|
|
found = 1;
|
|
signals_waiting[i].count = 0;
|
|
const char *name = signals_waiting[i].name;
|
|
|
|
switch (signals_waiting[i].action) {
|
|
case NETDATA_SIGNAL_RELOAD_HEALTH:
|
|
nd_log_limits_unlimited();
|
|
netdata_log_info("SIGNAL: Received %s. Reloading HEALTH configuration...", name);
|
|
nd_log_limits_reset();
|
|
execute_command(CMD_RELOAD_HEALTH, NULL, NULL);
|
|
break;
|
|
|
|
case NETDATA_SIGNAL_SAVE_DATABASE:
|
|
nd_log_limits_unlimited();
|
|
netdata_log_info("SIGNAL: Received %s. Saving databases...", name);
|
|
nd_log_limits_reset();
|
|
execute_command(CMD_SAVE_DATABASE, NULL, NULL);
|
|
break;
|
|
|
|
case NETDATA_SIGNAL_REOPEN_LOGS:
|
|
nd_log_limits_unlimited();
|
|
netdata_log_info("SIGNAL: Received %s. Reopening all log files...", name);
|
|
nd_log_limits_reset();
|
|
execute_command(CMD_REOPEN_LOGS, NULL, NULL);
|
|
break;
|
|
|
|
case NETDATA_SIGNAL_EXIT_CLEANLY:
|
|
nd_log_limits_unlimited();
|
|
netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name);
|
|
commands_exit();
|
|
netdata_cleanup_and_exit(0);
|
|
exit(0);
|
|
break;
|
|
|
|
case NETDATA_SIGNAL_FATAL:
|
|
fatal("SIGNAL: Received %s. netdata now exits.", name);
|
|
break;
|
|
|
|
case NETDATA_SIGNAL_CHILD:
|
|
reap_children();
|
|
break;
|
|
|
|
default:
|
|
netdata_log_info("SIGNAL: Received %s. No signal handler configured. Ignoring it.", name);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
netdata_log_error("SIGNAL: pause() returned but it was not interrupted by a signal.");
|
|
}
|
|
}
|