mirror of
https://github.com/netdata/netdata.git
synced 2025-04-28 06:32:30 +00:00
do not recurse cleanup on shutdown (#19894)
* do not recurse cleanup on shutdown * make exit_initiated sigatomic_t and hide it behind a function * fix freebsd and windows
This commit is contained in:
parent
ace55b6d66
commit
87dbbaadb6
20 changed files with 54 additions and 44 deletions
src
collectors
cups.plugin
freebsd.plugin
nfacct.plugin
perf.plugin
xenstat.plugin
daemon
commands.cdaemon-service.cdaemon-shutdown.cdaemon-shutdown.hdaemon-status-file.cdaemon-systemd-watcher.cmain.csignal-handler.cwinsvc.cc
database/engine
health
libnetdata/exit
web/server
|
@ -243,7 +243,7 @@ int main(int argc, char **argv) {
|
|||
for (iteration = 0; 1; iteration++) {
|
||||
heartbeat_next(&hb);
|
||||
|
||||
if (unlikely(exit_initiated))
|
||||
if (unlikely(exit_initiated_get()))
|
||||
break;
|
||||
|
||||
reset_metrics();
|
||||
|
@ -315,7 +315,7 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
cupsFreeDests(num_dest_total, dests);
|
||||
|
||||
if (unlikely(exit_initiated))
|
||||
if (unlikely(exit_initiated_get()))
|
||||
break;
|
||||
|
||||
cups_job_t *jobs, *curr_job;
|
||||
|
@ -410,7 +410,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
fflush(stdout);
|
||||
|
||||
if (unlikely(exit_initiated))
|
||||
if (unlikely(exit_initiated_get()))
|
||||
break;
|
||||
|
||||
// restart check (14400 seconds)
|
||||
|
|
|
@ -91,7 +91,7 @@ void *freebsd_main(void *ptr)
|
|||
|
||||
// initialize FreeBSD plugin
|
||||
if (freebsd_plugin_init())
|
||||
netdata_cleanup_and_exit(EXIT_REASON_FATAL, NULL, NULL, NULL);
|
||||
netdata_cleanup_and_exit_fatal(EXIT_REASON_FATAL);
|
||||
|
||||
// check the enabled status for each module
|
||||
int i;
|
||||
|
|
|
@ -837,7 +837,7 @@ int main(int argc, char **argv) {
|
|||
for(iteration = 0; 1; iteration++) {
|
||||
usec_t dt = heartbeat_next(&hb);
|
||||
|
||||
if(unlikely(exit_initiated)) break;
|
||||
if(unlikely(exit_initiated_get())) break;
|
||||
|
||||
if(debug && iteration)
|
||||
fprintf(stderr, "nfacct.plugin: iteration %zu, dt %"PRIu64" usec\n"
|
||||
|
|
|
@ -1325,7 +1325,7 @@ int main(int argc, char **argv) {
|
|||
for(iteration = 0; 1; iteration++) {
|
||||
usec_t dt = heartbeat_next(&hb);
|
||||
|
||||
if (unlikely(exit_initiated))
|
||||
if (unlikely(exit_initiated_get()))
|
||||
break;
|
||||
|
||||
if (unlikely(debug && iteration))
|
||||
|
|
|
@ -1026,7 +1026,7 @@ int main(int argc, char **argv) {
|
|||
for(iteration = 0; 1; iteration++) {
|
||||
usec_t dt = heartbeat_next(&hb);
|
||||
|
||||
if(unlikely(exit_initiated)) break;
|
||||
if(unlikely(exit_initiated_get())) break;
|
||||
|
||||
if(unlikely(debug && iteration))
|
||||
fprintf(stderr, "xenstat.plugin: iteration %zu, dt %lu usec\n", iteration, dt);
|
||||
|
|
|
@ -164,7 +164,7 @@ static cmd_status_t cmd_exit_execute(char *args, char **message)
|
|||
|
||||
nd_log_limits_unlimited();
|
||||
netdata_log_info("COMMAND: Cleaning up to exit.");
|
||||
netdata_cleanup_and_exit(EXIT_REASON_CMD_EXIT, NULL, NULL, NULL);
|
||||
netdata_cleanup_and_exit_gracefully(EXIT_REASON_CMD_EXIT);
|
||||
exit(0);
|
||||
|
||||
return CMD_STATUS_SUCCESS;
|
||||
|
|
|
@ -90,7 +90,7 @@ bool service_running(SERVICE_TYPE service) {
|
|||
if (sth->type == SERVICE_THREAD_TYPE_NETDATA)
|
||||
cancelled = nd_thread_signaled_to_cancel();
|
||||
|
||||
return !sth->stop_immediately && !exit_initiated && !cancelled;
|
||||
return !sth->stop_immediately && !exit_initiated_get() && !cancelled;
|
||||
}
|
||||
|
||||
void service_signal_exit(SERVICE_TYPE service) {
|
||||
|
|
|
@ -38,7 +38,7 @@ extern struct netdata_static_thread *static_threads;
|
|||
|
||||
void netdata_log_exit_reason(void) {
|
||||
CLEAN_BUFFER *wb = buffer_create(0, NULL);
|
||||
EXIT_REASON_2buffer(wb, exit_initiated, ", ");
|
||||
EXIT_REASON_2buffer(wb, exit_initiated_get(), ", ");
|
||||
|
||||
ND_LOG_STACK lgs[] = {
|
||||
ND_LOG_FIELD_UUID(NDF_MESSAGE_ID, &netdata_exit_msgid),
|
||||
|
@ -46,7 +46,7 @@ void netdata_log_exit_reason(void) {
|
|||
};
|
||||
ND_LOG_STACK_PUSH(lgs);
|
||||
|
||||
nd_log(NDLS_DAEMON, is_exit_reason_normal(exit_initiated) ? NDLP_NOTICE : NDLP_CRIT,
|
||||
nd_log(NDLS_DAEMON, is_exit_reason_normal(exit_initiated_get()) ? NDLP_NOTICE : NDLP_CRIT,
|
||||
"NETDATA SHUTDOWN: initializing shutdown with code due to: %s",
|
||||
buffer_tostring(wb));
|
||||
}
|
||||
|
@ -109,8 +109,7 @@ static void *rrdeng_exit_background(void *ptr) {
|
|||
}
|
||||
|
||||
#ifdef ENABLE_DBENGINE
|
||||
static void rrdeng_flush_everything_and_wait(bool wait_flush, bool wait_collectors, bool dirty_only)
|
||||
{
|
||||
static void rrdeng_flush_everything_and_wait(bool wait_flush, bool wait_collectors, bool dirty_only) {
|
||||
static size_t starting_size_to_flush = 0;
|
||||
|
||||
if(!pgc_hot_and_dirty_entries(main_cache))
|
||||
|
@ -172,9 +171,12 @@ static void rrdeng_flush_everything_and_wait(bool wait_flush, bool wait_collecto
|
|||
}
|
||||
#endif
|
||||
|
||||
void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char *action_result, const char *action_data) {
|
||||
#if !defined(OS_WINDOWS)
|
||||
NORETURN
|
||||
#endif
|
||||
static void netdata_cleanup_and_exit(EXIT_REASON reason) {
|
||||
exit_initiated_set(reason);
|
||||
int ret = is_exit_reason_normal(exit_initiated) ? 0 : 1;
|
||||
int ret = is_exit_reason_normal(exit_initiated_get()) ? 0 : 1;
|
||||
|
||||
// don't recurse (due to a fatal, while exiting)
|
||||
static bool run = false;
|
||||
|
@ -198,13 +200,9 @@ void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char
|
|||
rrdeng_flush_everything_and_wait(false, false, true);
|
||||
#endif
|
||||
|
||||
// send the stat from our caller
|
||||
analytics_statistic_t statistic = { action, action_result, action_data };
|
||||
analytics_statistic_send(&statistic);
|
||||
|
||||
// notify we are exiting
|
||||
statistic = (analytics_statistic_t) {"EXIT", ret?"ERROR":"OK","-"};
|
||||
analytics_statistic_send(&statistic);
|
||||
//analytics_statistic_t statistic = (analytics_statistic_t) {"EXIT", ret?"ERROR":"OK","-"};
|
||||
//analytics_statistic_send(&statistic);
|
||||
|
||||
netdata_main_spawn_server_cleanup();
|
||||
watcher_step_complete(WATCHER_STEP_ID_DESTROY_MAIN_SPAWN_SERVER);
|
||||
|
@ -393,3 +391,13 @@ void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void netdata_cleanup_and_exit_gracefully(EXIT_REASON reason) {
|
||||
exit_initiated_add(reason);
|
||||
FUNCTION_RUN_ONCE();
|
||||
netdata_cleanup_and_exit(reason);
|
||||
}
|
||||
|
||||
void netdata_cleanup_and_exit_fatal(EXIT_REASON reason) {
|
||||
netdata_cleanup_and_exit(reason);
|
||||
}
|
||||
|
|
|
@ -10,10 +10,7 @@ void cancel_main_threads(void);
|
|||
void abort_on_fatal_disable(void);
|
||||
void abort_on_fatal_enable(void);
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char *action_result, const char *action_data);
|
||||
#else
|
||||
void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char *action_result, const char *action_data) NORETURN;
|
||||
#endif
|
||||
void netdata_cleanup_and_exit_gracefully(EXIT_REASON reason);
|
||||
void netdata_cleanup_and_exit_fatal(EXIT_REASON reason);
|
||||
|
||||
#endif //NETDATA_DAEMON_SHUTDOWN_H
|
||||
|
|
|
@ -614,7 +614,7 @@ static void daemon_status_file_refresh(DAEMON_STATUS status) {
|
|||
|
||||
get_daemon_status_fields_from_system_info(&session_status);
|
||||
|
||||
session_status.exit_reason = exit_initiated;
|
||||
session_status.exit_reason = exit_initiated_get();
|
||||
session_status.profile = nd_profile_detect_and_configure(false);
|
||||
|
||||
if(status != DAEMON_STATUS_NONE)
|
||||
|
|
|
@ -28,7 +28,7 @@ static int shutdown_event_handler(sd_bus_message *m, void *userdata __maybe_unus
|
|||
shutdown ? "true" : "false");
|
||||
|
||||
if(shutdown)
|
||||
netdata_cleanup_and_exit(EXIT_REASON_SYSTEM_SHUTDOWN, NULL, NULL, NULL);
|
||||
netdata_cleanup_and_exit_gracefully(EXIT_REASON_SYSTEM_SHUTDOWN);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -240,7 +240,7 @@ int unittest_prepare_rrd(const char **user) {
|
|||
}
|
||||
|
||||
static void fatal_cleanup_and_exit_cb(void) {
|
||||
netdata_cleanup_and_exit(EXIT_REASON_FATAL, "fatal error", "exiting", NULL);
|
||||
netdata_cleanup_and_exit_fatal(EXIT_REASON_FATAL);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
|
|
@ -237,7 +237,7 @@ static void process_triggered_signals(void) {
|
|||
nd_log_limits_unlimited();
|
||||
netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name);
|
||||
commands_exit();
|
||||
netdata_cleanup_and_exit(signals_waiting[i].reason, NULL, NULL, NULL);
|
||||
netdata_cleanup_and_exit_gracefully(signals_waiting[i].reason);
|
||||
exit(0);
|
||||
break;
|
||||
|
||||
|
|
|
@ -109,7 +109,7 @@ static void *call_netdata_cleanup(void *arg)
|
|||
reason = EXIT_REASON_SERVICE_STOP;
|
||||
break;
|
||||
}
|
||||
netdata_cleanup_and_exit(reason, NULL, NULL, NULL);
|
||||
netdata_cleanup_and_exit_gracefully(reason);
|
||||
|
||||
// Close event handle
|
||||
netdata_service_log("Closing stop event handle...");
|
||||
|
|
|
@ -2312,7 +2312,7 @@ bool pgc_flush_pages(PGC *cache) {
|
|||
}
|
||||
|
||||
void pgc_page_hot_set_end_time_s(PGC *cache __maybe_unused, PGC_PAGE *page, time_t end_time_s, size_t additional_bytes) {
|
||||
internal_fatal(!is_page_hot(page) && !exit_initiated,
|
||||
internal_fatal(!is_page_hot(page) && !exit_initiated_get(),
|
||||
"DBENGINE CACHE: end_time_s update on non-hot page");
|
||||
|
||||
internal_fatal(end_time_s < __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED),
|
||||
|
|
|
@ -897,14 +897,14 @@ size_t pgd_append_point(
|
|||
uint32_t expected_slot)
|
||||
{
|
||||
if (pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING) {
|
||||
if(exit_initiated == EXIT_REASON_NONE)
|
||||
if(exit_initiated_get() == EXIT_REASON_NONE)
|
||||
pgd_fatal(pg, "Data collection on page already scheduled for flushing");
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(pg->states & PGD_STATE_CREATED_FROM_COLLECTOR)) {
|
||||
if(exit_initiated == EXIT_REASON_NONE)
|
||||
if(exit_initiated_get() == EXIT_REASON_NONE)
|
||||
pgd_fatal(pg, "DBENGINE: collection on page not created from a collector");
|
||||
else
|
||||
return 0;
|
||||
|
|
|
@ -250,7 +250,7 @@ static void rrdcalc_link_to_rrdset(RRDCALC *rc) {
|
|||
static void rrdcalc_unlink_from_rrdset(RRDCALC *rc, bool having_ll_wrlock) {
|
||||
RRDSET *st = rc->rrdset;
|
||||
|
||||
if (!exit_initiated) {
|
||||
if (!exit_initiated_get()) {
|
||||
RRDHOST *host = st->rrdhost;
|
||||
|
||||
time_t now = now_realtime_sec();
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
#include "../libnetdata.h"
|
||||
|
||||
volatile EXIT_REASON exit_initiated = EXIT_REASON_NONE;
|
||||
static volatile sig_atomic_t exit_initiated = EXIT_REASON_NONE;
|
||||
|
||||
ENUM_STR_MAP_DEFINE(EXIT_REASON) = {
|
||||
{ EXIT_REASON_SIGBUS, "signal-bus-error"},
|
||||
|
@ -108,15 +108,22 @@ void exit_initiated_init(void) {
|
|||
self = os_get_file_metadata(self_path);
|
||||
}
|
||||
|
||||
ALWAYS_INLINE
|
||||
EXIT_REASON exit_initiated_get(void) {
|
||||
return (EXIT_REASON)exit_initiated;
|
||||
}
|
||||
|
||||
void exit_initiated_add(EXIT_REASON reason) {
|
||||
exit_initiated |= reason;
|
||||
exit_initiated |= (sig_atomic_t)reason;
|
||||
}
|
||||
|
||||
void exit_initiated_set(EXIT_REASON reason) {
|
||||
if(exit_initiated == EXIT_REASON_NONE && !(reason & EXIT_REASON_SYSTEM_SHUTDOWN) && is_system_shutdown())
|
||||
EXIT_REASON old = exit_initiated_get();
|
||||
|
||||
if(old == EXIT_REASON_NONE && !(reason & EXIT_REASON_SYSTEM_SHUTDOWN) && is_system_shutdown())
|
||||
reason |= EXIT_REASON_SYSTEM_SHUTDOWN;
|
||||
|
||||
if(exit_initiated == EXIT_REASON_NONE && self_path && OS_FILE_METADATA_OK(self)) {
|
||||
if(old == EXIT_REASON_NONE && self_path && OS_FILE_METADATA_OK(self)) {
|
||||
OS_FILE_METADATA self_now = os_get_file_metadata(self_path);
|
||||
if(OS_FILE_METADATA_OK(self_now) && (self_now.modified_time != self.modified_time || self_now.size_bytes != self.size_bytes))
|
||||
reason |= EXIT_REASON_UPDATE;
|
||||
|
@ -127,4 +134,3 @@ void exit_initiated_set(EXIT_REASON reason) {
|
|||
// we will have all of them
|
||||
exit_initiated_add(reason);
|
||||
}
|
||||
|
||||
|
|
|
@ -82,9 +82,8 @@ typedef enum {
|
|||
typedef struct web_buffer BUFFER;
|
||||
BITMAP_STR_DEFINE_FUNCTIONS_EXTERN(EXIT_REASON);
|
||||
|
||||
extern volatile EXIT_REASON exit_initiated;
|
||||
|
||||
void exit_initiated_init(void);
|
||||
EXIT_REASON exit_initiated_get(void);
|
||||
void exit_initiated_set(EXIT_REASON reason);
|
||||
void exit_initiated_add(EXIT_REASON reason);
|
||||
|
||||
|
|
|
@ -1185,13 +1185,13 @@ static inline int web_client_process_url(RRDHOST *host, struct web_client *w, ch
|
|||
w->response.data->content_type = CT_TEXT_PLAIN;
|
||||
buffer_flush(w->response.data);
|
||||
|
||||
if(!exit_initiated)
|
||||
if(!exit_initiated_get())
|
||||
buffer_strcat(w->response.data, "ok, will do...");
|
||||
else
|
||||
buffer_strcat(w->response.data, "I am doing it already");
|
||||
|
||||
netdata_log_error("web request to exit received.");
|
||||
netdata_cleanup_and_exit(EXIT_REASON_API_QUIT, NULL, NULL, NULL);
|
||||
netdata_cleanup_and_exit_gracefully(EXIT_REASON_API_QUIT);
|
||||
return HTTP_RESP_OK;
|
||||
}
|
||||
else if(unlikely(hash == hash_debug && strcmp(tok, "debug") == 0)) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue