0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-25 05:31:37 +00:00
netdata_netdata/daemon/signals.c
2021-04-14 12:31:40 +03:00

285 lines
9.9 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "common.h"
static int reaper_enabled = 0;
typedef enum signal_action {
NETDATA_SIGNAL_END_OF_LIST,
NETDATA_SIGNAL_IGNORE,
NETDATA_SIGNAL_EXIT_CLEANLY,
NETDATA_SIGNAL_SAVE_DATABASE,
NETDATA_SIGNAL_REOPEN_LOGS,
NETDATA_SIGNAL_RELOAD_HEALTH,
NETDATA_SIGNAL_FATAL,
NETDATA_SIGNAL_CHILD,
} SIGNAL_ACTION;
static struct {
int signo; // the signal
const char *name; // the name of the signal
size_t count; // the number of signals received
SIGNAL_ACTION action; // the action to take
} signals_waiting[] = {
{ SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE },
{ SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
{ SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
{ SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
{ SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS },
{ SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE },
{ SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH },
{ SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL },
{ SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD },
// terminator
{ 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST }
};
static void signal_handler(int signo) {
// find the entry in the list
int i;
for(i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST ; i++) {
if(unlikely(signals_waiting[i].signo == signo)) {
signals_waiting[i].count++;
if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) {
char buffer[200 + 1];
snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name);
if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) {
// nothing to do - we cannot write but there is no way to complain about it
;
}
}
return;
}
}
}
void signals_block(void) {
sigset_t sigset;
sigfillset(&sigset);
if(pthread_sigmask(SIG_BLOCK, &sigset, NULL) == -1)
error("SIGNAL: Could not block signals for threads");
}
void signals_unblock(void) {
sigset_t sigset;
sigfillset(&sigset);
if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) {
error("SIGNAL: Could not unblock signals for threads");
}
}
void signals_init(void) {
// Catch signals which we want to use
struct sigaction sa;
sa.sa_flags = 0;
// Enable process tracking / reaper if running as init (pid == 1).
// This prevents zombie processes when running in a container.
if (getpid() == 1) {
info("SIGNAL: Enabling reaper");
myp_init();
reaper_enabled = 1;
} else {
info("SIGNAL: Not enabling reaper");
}
// ignore all signals while we run in a signal handler
sigfillset(&sa.sa_mask);
int i;
for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
switch (signals_waiting[i].action) {
case NETDATA_SIGNAL_IGNORE:
sa.sa_handler = SIG_IGN;
break;
case NETDATA_SIGNAL_CHILD:
if (reaper_enabled == 0)
continue;
// FALLTHROUGH
default:
sa.sa_handler = signal_handler;
break;
}
if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
error("SIGNAL: Failed to change signal handler for: %s", signals_waiting[i].name);
}
}
void signals_restore_SIGCHLD(void)
{
struct sigaction sa;
if (reaper_enabled == 0)
return;
sa.sa_flags = 0;
sigfillset(&sa.sa_mask);
sa.sa_handler = signal_handler;
if(sigaction(SIGCHLD, &sa, NULL) == -1)
error("SIGNAL: Failed to change signal handler for: SIGCHLD");
}
void signals_reset(void) {
struct sigaction sa;
sigemptyset(&sa.sa_mask);
sa.sa_handler = SIG_DFL;
sa.sa_flags = 0;
int i;
for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name);
}
if (reaper_enabled == 1)
myp_free();
}
// reap_child reaps the child identified by pid.
static void reap_child(pid_t pid) {
siginfo_t i;
errno = 0;
debug(D_CHILDS, "SIGNAL: Reaping pid: %d...", pid);
if (waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
if (errno != ECHILD)
error("SIGNAL: Failed to wait for: %d", pid);
else
debug(D_CHILDS, "SIGNAL: Already reaped: %d", pid);
return;
} else if (i.si_pid == 0) {
// Process didn't exit, this shouldn't happen.
return;
}
switch (i.si_code) {
case CLD_EXITED:
debug(D_CHILDS, "SIGNAL: Child %d exited: %d", pid, i.si_status);
break;
case CLD_KILLED:
debug(D_CHILDS, "SIGNAL: Child %d killed by signal: %d", pid, i.si_status);
break;
case CLD_DUMPED:
debug(D_CHILDS, "SIGNAL: Child %d dumped core by signal: %d", pid, i.si_status);
break;
case CLD_STOPPED:
debug(D_CHILDS, "SIGNAL: Child %d stopped by signal: %d", pid, i.si_status);
break;
case CLD_TRAPPED:
debug(D_CHILDS, "SIGNAL: Child %d trapped by signal: %d", pid, i.si_status);
break;
case CLD_CONTINUED:
debug(D_CHILDS, "SIGNAL: Child %d continued by signal: %d", pid, i.si_status);
break;
default:
debug(D_CHILDS, "SIGNAL: Child %d gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
}
}
// reap_children reaps all pending children which are not managed by myp.
static void reap_children() {
siginfo_t i;
while (1 == 1) {
// Identify which process caused the signal so we can determine
// if we need to reap a re-parented process.
i.si_pid = 0;
if (waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1) {
if (errno != ECHILD) // This shouldn't happen with WNOHANG but does.
error("SIGNAL: Failed to wait");
return;
} else if (i.si_pid == 0) {
// No child exited.
return;
} else if (myp_reap(i.si_pid) == 0) {
// myp managed, sleep for a short time to avoid busy wait while
// this is handled by myp.
usleep(10000);
} else {
// Unknown process, likely a re-parented child, reap it.
reap_child(i.si_pid);
}
}
}
void signals_handle(void) {
while(1) {
// pause() causes the calling process (or thread) to sleep until a signal
// is delivered that either terminates the process or causes the invocation
// of a signal-catching function.
if(pause() == -1 && errno == EINTR) {
// loop once, but keep looping while signals are coming in
// this is needed because a few operations may take some time
// so we need to check for new signals before pausing again
int found = 1;
while(found) {
found = 0;
// execute the actions of the signals
int i;
for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
if (signals_waiting[i].count) {
found = 1;
signals_waiting[i].count = 0;
const char *name = signals_waiting[i].name;
switch (signals_waiting[i].action) {
case NETDATA_SIGNAL_RELOAD_HEALTH:
error_log_limit_unlimited();
info("SIGNAL: Received %s. Reloading HEALTH configuration...", name);
error_log_limit_reset();
execute_command(CMD_RELOAD_HEALTH, NULL, NULL);
break;
case NETDATA_SIGNAL_SAVE_DATABASE:
error_log_limit_unlimited();
info("SIGNAL: Received %s. Saving databases...", name);
error_log_limit_reset();
execute_command(CMD_SAVE_DATABASE, NULL, NULL);
break;
case NETDATA_SIGNAL_REOPEN_LOGS:
error_log_limit_unlimited();
info("SIGNAL: Received %s. Reopening all log files...", name);
error_log_limit_reset();
execute_command(CMD_REOPEN_LOGS, NULL, NULL);
break;
case NETDATA_SIGNAL_EXIT_CLEANLY:
error_log_limit_unlimited();
info("SIGNAL: Received %s. Cleaning up to exit...", name);
commands_exit();
netdata_cleanup_and_exit(0);
exit(0);
break;
case NETDATA_SIGNAL_FATAL:
fatal("SIGNAL: Received %s. netdata now exits.", name);
break;
case NETDATA_SIGNAL_CHILD:
debug(D_CHILDS, "SIGNAL: Received %s. Reaping...", name);
reap_children();
break;
default:
info("SIGNAL: Received %s. No signal handler configured. Ignoring it.", name);
break;
}
}
}
}
}
else
error("SIGNAL: pause() returned but it was not interrupted by a signal.");
}
}