diff --git a/CMakeLists.txt b/CMakeLists.txt index a3ac834448..5dee361775 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -668,8 +668,6 @@ set(LIBNETDATA_FILES src/libnetdata/os/byteorder.h src/libnetdata/onewayalloc/onewayalloc.c src/libnetdata/onewayalloc/onewayalloc.h - src/libnetdata/popen/popen.c - src/libnetdata/popen/popen.h src/libnetdata/procfile/procfile.c src/libnetdata/procfile/procfile.h src/libnetdata/query_progress/progress.c @@ -720,8 +718,6 @@ set(LIBNETDATA_FILES src/libnetdata/linked-lists.h src/libnetdata/storage-point.h src/libnetdata/bitmap64.h - src/libnetdata/os/waitid.c - src/libnetdata/os/waitid.h src/libnetdata/os/gettid.c src/libnetdata/os/gettid.h src/libnetdata/os/adjtimex.c @@ -750,6 +746,14 @@ set(LIBNETDATA_FILES src/libnetdata/os/setenv.h src/libnetdata/os/strndup.c src/libnetdata/os/strndup.h + src/libnetdata/spawn_server/spawn_server.c + src/libnetdata/spawn_server/spawn_server.h + src/libnetdata/spawn_server/spawn_popen.c + src/libnetdata/spawn_server/spawn_popen.h + src/libnetdata/os/close_range.c + src/libnetdata/os/close_range.h + src/libnetdata/os/setproctitle.c + src/libnetdata/os/setproctitle.h ) if(ENABLE_PLUGIN_EBPF) @@ -1216,13 +1220,6 @@ set(CLAIM_PLUGIN_FILES src/claim/claim.h ) -set(SPAWN_PLUGIN_FILES - src/spawn/spawn.c - src/spawn/spawn_server.c - src/spawn/spawn_client.c - src/spawn/spawn.h -) - set(ACLK_ALWAYS_BUILD src/aclk/aclk_rrdhost_state.h src/aclk/aclk_proxy.c @@ -1410,7 +1407,6 @@ set(NETDATA_FILES ${STREAMING_PLUGIN_FILES} ${WEB_PLUGIN_FILES} ${CLAIM_PLUGIN_FILES} - ${SPAWN_PLUGIN_FILES} ${ACLK_ALWAYS_BUILD} ${PROFILE_PLUGIN_FILES} ) diff --git a/packaging/utils/compile-on-windows.sh b/packaging/utils/compile-on-windows.sh index 103052be49..7e4e30eb34 100644 --- a/packaging/utils/compile-on-windows.sh +++ b/packaging/utils/compile-on-windows.sh @@ -26,6 +26,12 @@ then exit 0 fi +BUILD_FOR_PACKAGING="Off" +if [ "${1}" = "package" ] +then + BUILD_FOR_PACKAGING="On" +fi + export PATH="/usr/local/bin:${PATH}" WT_ROOT="$(pwd)" @@ -54,7 +60,7 @@ fi -DCMAKE_INSTALL_PREFIX="/opt/netdata" \ -DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \ -DCMAKE_C_FLAGS="-fstack-protector-all -O0 -ggdb -Wall -Wextra -Wno-char-subscripts -Wa,-mbig-obj -pipe -DNETDATA_INTERNAL_CHECKS=1 -D_FILE_OFFSET_BITS=64 -D__USE_MINGW_ANSI_STDIO=1" \ - -DBUILD_FOR_PACKAGING=On \ + -DBUILD_FOR_PACKAGING=${BUILD_FOR_PACKAGING} \ -DUSE_MOLD=Off \ -DNETDATA_USER="${USER}" \ -DDEFAULT_FEATURE_STATE=Off \ diff --git a/src/aclk/aclk_rx_msgs.c b/src/aclk/aclk_rx_msgs.c index 60e421928e..432242f5ed 100644 --- a/src/aclk/aclk_rx_msgs.c +++ b/src/aclk/aclk_rx_msgs.c @@ -106,13 +106,13 @@ static inline int aclk_v2_payload_get_query(const char *payload, char **query_ur else if(strncmp(payload, "DELETE /", 8) == 0) start = payload + 7; else { - errno = 0; + errno_clear(); netdata_log_error("Only accepting requests that start with GET, POST, PUT, DELETE from CLOUD."); return 1; } if(!(end = strstr(payload, HTTP_1_1 HTTP_ENDL))) { - errno = 0; + errno_clear(); netdata_log_error("Doesn't look like HTTP GET request."); return 1; } @@ -127,7 +127,7 @@ static int aclk_handle_cloud_http_request_v2(struct aclk_request *cloud_to_agent { aclk_query_t query; - errno = 0; + errno_clear(); if (cloud_to_agent->version < ACLK_V_COMPRESSION) { netdata_log_error( "This handler cannot reply to request with version older than %d, received %d.", diff --git a/src/claim/claim.c b/src/claim/claim.c index 5f4ec9a433..16058b930b 100644 --- a/src/claim/claim.c +++ b/src/claim/claim.c @@ -53,11 +53,8 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con } #ifndef DISABLE_CLOUD - int exit_code; - pid_t command_pid; char command_exec_buffer[CLAIMING_COMMAND_LENGTH + 1]; char command_line_buffer[CLAIMING_COMMAND_LENGTH + 1]; - FILE *fp_child_output, *fp_child_input; // This is guaranteed to be set early in main via post_conf_load() char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL); @@ -92,17 +89,17 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con claiming_arguments); netdata_log_info("Executing agent claiming command: %s", command_exec_buffer); - fp_child_output = netdata_popen(command_line_buffer, &command_pid, &fp_child_input); - if(!fp_child_output) { + POPEN_INSTANCE *instance = spawn_popen_run(command_exec_buffer); + if(!instance) { netdata_log_error("Cannot popen(\"%s\").", command_exec_buffer); return CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT; } netdata_log_info("Waiting for claiming command '%s' to finish.", command_exec_buffer); char read_buffer[100 + 1]; - while (fgets(read_buffer, 100, fp_child_output) != NULL) ; + while (fgets(read_buffer, 100, instance->child_stdout_fp) != NULL) ; - exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid); + int exit_code = spawn_popen_wait(instance); netdata_log_info("Agent claiming command '%s' returned with code %d", command_exec_buffer, exit_code); if (0 == exit_code) { @@ -113,7 +110,7 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con netdata_log_error("Agent claiming command '%s' failed to complete its run", command_exec_buffer); return CLAIM_AGENT_CLAIM_SCRIPT_FAILED; } - errno = 0; + errno_clear(); unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1; if ((unsigned)exit_code > maximum_known_exit_code) { @@ -214,7 +211,7 @@ void load_cloud_conf(int silent) netdata_cloud_enabled = CONFIG_BOOLEAN_NO; char *filename; - errno = 0; + errno_clear(); int ret = 0; diff --git a/src/collectors/apps.plugin/apps_plugin.c b/src/collectors/apps.plugin/apps_plugin.c index b660f8171c..8fe1ff0081 100644 --- a/src/collectors/apps.plugin/apps_plugin.c +++ b/src/collectors/apps.plugin/apps_plugin.c @@ -51,7 +51,6 @@ size_t inodes_changed_counter = 0, links_changed_counter = 0, targets_assignment_counter = 0, - all_pids_count = 0, // the number of processes running apps_groups_targets_count = 0; // # of apps_groups.conf targets int @@ -136,20 +135,6 @@ struct target size_t pagesize; -struct pid_stat - *root_of_pids = NULL, // global list of all processes running - **all_pids = NULL; // to avoid allocations, we pre-allocate - // a pointer for each pid in the entire pid space. - -#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) -// Another pre-allocated list of all possible pids. -// We need it to pids and assign them a unique sortlist id, so that we -// read parents before children. This is needed to prevent a situation where -// a child is found running, but until we read its parent, it has exited and -// its parent has accumulated its resources. -pid_t *all_pids_sortlist = NULL; -#endif - // ---------------------------------------------------------------------------- int managed_log(struct pid_stat *p, PID_LOG log, int status) { @@ -208,7 +193,7 @@ int managed_log(struct pid_stat *p, PID_LOG log, int status) { } } } - errno = 0; + errno_clear(); } else if(unlikely(p->log_thrown & log)) { // netdata_log_error("unsetting log %u on pid %d", log, p->pid); @@ -300,12 +285,14 @@ static void apply_apps_groups_targets_inheritance(void) { } // init goes always to default target - if(all_pids[INIT_PID] && !all_pids[INIT_PID]->matched_by_config) - all_pids[INIT_PID]->target = apps_groups_default_target; + struct pid_stat *pi = find_pid_entry(INIT_PID); + if(pi && !pi->matched_by_config) + pi->target = apps_groups_default_target; // pid 0 goes always to default target - if(all_pids[0] && !all_pids[INIT_PID]->matched_by_config) - all_pids[0]->target = apps_groups_default_target; + pi = find_pid_entry(0); + if(pi && !pi->matched_by_config) + pi->target = apps_groups_default_target; // give a default target on all top level processes if(unlikely(debug_enabled)) loops++; @@ -320,8 +307,9 @@ static void apply_apps_groups_targets_inheritance(void) { p->sortlist = sortlist++; } - if(all_pids[1]) - all_pids[1]->sortlist = sortlist++; + pi = find_pid_entry(1); + if(pi) + pi->sortlist = sortlist++; // give a target to all merged child processes found = 1; @@ -1052,12 +1040,7 @@ int main(int argc, char **argv) { netdata_log_info("started on pid %d", getpid()); users_and_groups_init(); - -#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) - all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1); -#endif - - all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1); + pids_init(); // ------------------------------------------------------------------------ // the event loop for functions diff --git a/src/collectors/apps.plugin/apps_plugin.h b/src/collectors/apps.plugin/apps_plugin.h index ce4d815adc..a085872d9f 100644 --- a/src/collectors/apps.plugin/apps_plugin.h +++ b/src/collectors/apps.plugin/apps_plugin.h @@ -17,9 +17,7 @@ #include <sys/proc_info.h> #include <sys/sysctl.h> #include <mach/mach_time.h> // For mach_timebase_info_data_t and mach_timebase_info -#endif -#if defined(__APPLE__) extern mach_timebase_info_data_t mach_info; #endif @@ -47,7 +45,6 @@ struct pid_info { struct proc_taskinfo taskinfo; struct proc_bsdinfo bsdinfo; struct rusage_info_v4 rusageinfo; - }; #endif @@ -467,9 +464,7 @@ extern struct target *users_root_target, *groups_root_target; -extern struct pid_stat - *root_of_pids, - **all_pids; +extern struct pid_stat *root_of_pids; extern int update_every; extern unsigned int time_factor; @@ -559,4 +554,7 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const void send_collected_data_to_netdata(struct target *root, const char *type, usec_t dt); void send_resource_usage_to_netdata(usec_t dt); +void pids_init(void); +struct pid_stat *find_pid_entry(pid_t pid); + #endif //NETDATA_APPS_PLUGIN_H diff --git a/src/collectors/apps.plugin/apps_proc_pid_limits.c b/src/collectors/apps.plugin/apps_proc_pid_limits.c index a1e15f63cd..7485086ba3 100644 --- a/src/collectors/apps.plugin/apps_proc_pid_limits.c +++ b/src/collectors/apps.plugin/apps_proc_pid_limits.c @@ -33,7 +33,7 @@ static inline bool read_proc_pid_limits_per_os(struct pid_stat *p, void *ptr __m bool ret = false; bool read_limits = false; - errno = 0; + errno_clear(); proc_pid_limits_buffer[0] = '\0'; kernel_uint_t all_fds = pid_openfds_sum(p); diff --git a/src/collectors/apps.plugin/apps_proc_pids.c b/src/collectors/apps.plugin/apps_proc_pids.c index fd7e776fa1..b53060d60b 100644 --- a/src/collectors/apps.plugin/apps_proc_pids.c +++ b/src/collectors/apps.plugin/apps_proc_pids.c @@ -2,18 +2,44 @@ #include "apps_plugin.h" -static inline struct pid_stat *get_pid_entry(pid_t pid) { - if(likely(all_pids[pid])) - return all_pids[pid]; +static struct pid_stat **all_pids = NULL; +size_t all_pids_count = 0; // the number of processes running - struct pid_stat *p = callocz(sizeof(struct pid_stat), 1); +struct pid_stat *root_of_pids = NULL; // global linked list of all processes running + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) +// Another pre-allocated list of all possible pids. +// We need it to assign them a unique sortlist id, so that we +// read parents before children. This is needed to prevent a situation where +// a child is found running, but until we read its parent, it has exited and +// its parent has accumulated its resources. +pid_t *all_pids_sortlist = NULL; +#endif + +void pids_init(void) { +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1); +#endif + + all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1); +} + +inline struct pid_stat *find_pid_entry(pid_t pid) { + return all_pids[pid]; +} + +static inline struct pid_stat *get_or_allocate_pid_entry(pid_t pid) { + struct pid_stat *p = find_pid_entry(pid); + if(likely(p)) + return p; + + p = callocz(sizeof(struct pid_stat), 1); p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS); p->fds_size = MAX_SPARE_FDS; init_pid_fds(p, 0, p->fds_size); p->pid = pid; DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(root_of_pids, p, prev, next); - all_pids[pid] = p; all_pids_count++; @@ -21,7 +47,7 @@ static inline struct pid_stat *get_pid_entry(pid_t pid) { } static inline void del_pid_entry(pid_t pid) { - struct pid_stat *p = all_pids[pid]; + struct pid_stat *p = find_pid_entry(pid); if(unlikely(!p)) { netdata_log_error("attempted to free pid %d that is not allocated.", pid); @@ -62,7 +88,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { return 0; } - struct pid_stat *p = get_pid_entry(pid); + struct pid_stat *p = get_or_allocate_pid_entry(pid); if(unlikely(!p || p->read)) return 0; p->read = true; diff --git a/src/collectors/cgroups.plugin/cgroup-discovery.c b/src/collectors/cgroups.plugin/cgroup-discovery.c index 61d5c08ffb..d880f8a711 100644 --- a/src/collectors/cgroups.plugin/cgroup-discovery.c +++ b/src/collectors/cgroups.plugin/cgroup-discovery.c @@ -178,11 +178,9 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) { netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); - pid_t cgroup_pid; - FILE *fp_child_input, *fp_child_output; - (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id); - if (!fp_child_output) { + POPEN_INSTANCE *instance = spawn_popen_run_variadic(cgroups_rename_script, cg->id, cg->intermediate_id, NULL); + if (!instance) { collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id); cg->pending_renames = 0; cg->processed = 1; @@ -190,8 +188,8 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) { } char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output); - int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, instance->child_stdout_fp); + int exit_code = spawn_popen_wait(instance); switch (exit_code) { case 0: @@ -1085,7 +1083,6 @@ static void cgroup_cleanup_ebpf_integration() static inline void read_cgroup_network_interfaces(struct cgroup *cg) { netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); - pid_t cgroup_pid; char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { @@ -1096,16 +1093,15 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { } netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); - FILE *fp_child_input, *fp_child_output; - (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier); - if(!fp_child_output) { + POPEN_INSTANCE *instance = spawn_popen_run_variadic(cgroups_network_interface_script, "--cgroup", cgroup_identifier, NULL); + if(!instance) { collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier); return; } char *s; char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; - while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, instance->child_stdout_fp))) { trim(s); if(*s && *s != '\n') { @@ -1145,7 +1141,7 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { } } - netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + spawn_popen_wait(instance); } static inline void discovery_process_cgroup(struct cgroup *cg) { diff --git a/src/collectors/cgroups.plugin/cgroup-network.c b/src/collectors/cgroups.plugin/cgroup-network.c index 685282e890..4cb5cbabe9 100644 --- a/src/collectors/cgroups.plugin/cgroup-network.c +++ b/src/collectors/cgroups.plugin/cgroup-network.c @@ -421,19 +421,19 @@ void detect_veth_interfaces(pid_t pid) { host = read_proc_net_dev("host", netdata_configured_host_prefix); if(!host) { - errno = 0; + errno_clear(); collector_error("cannot read host interface list."); goto cleanup; } if(!eligible_ifaces(host)) { - errno = 0; + errno_clear(); collector_info("there are no double-linked host interfaces available."); goto cleanup; } if(switch_namespace(netdata_configured_host_prefix, pid)) { - errno = 0; + errno_clear(); collector_error("cannot switch to the namespace of pid %u", (unsigned int) pid); goto cleanup; } @@ -444,13 +444,13 @@ void detect_veth_interfaces(pid_t pid) { cgroup = read_proc_net_dev("cgroup", NULL); if(!cgroup) { - errno = 0; + errno_clear(); collector_error("cannot read cgroup interface list."); goto cleanup; } if(!eligible_ifaces(cgroup)) { - errno = 0; + errno_clear(); collector_error("there are not double-linked cgroup interfaces available."); goto cleanup; } @@ -505,22 +505,20 @@ void call_the_helper(pid_t pid, const char *cgroup) { collector_info("running: %s", command); - pid_t cgroup_pid; - FILE *fp_child_input, *fp_child_output; + POPEN_INSTANCE *pi; - if(cgroup) { - (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup); - } + if(cgroup) + pi = spawn_popen_run_variadic(PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup, NULL); else { char buffer[100]; snprintfz(buffer, sizeof(buffer) - 1, "%d", pid); - (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer); + pi = spawn_popen_run_variadic(PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer, NULL); } - if(fp_child_output) { + if(pi) { char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; char *s; - while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, pi->child_stdout_fp))) { trim(s); if(*s && *s != '\n') { @@ -536,7 +534,7 @@ void call_the_helper(pid_t pid, const char *cgroup) { } } - netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + spawn_popen_kill(pi); } else collector_error("cannot execute cgroup-network helper script: %s", command); @@ -701,7 +699,7 @@ int main(int argc, char **argv) { pid = atoi(argv[arg+1]); if(pid <= 0) { - errno = 0; + errno_clear(); collector_error("Invalid pid %d given", (int) pid); return 2; } @@ -719,7 +717,7 @@ int main(int argc, char **argv) { if(helper) call_the_helper(pid, cgroup); if(pid <= 0 && !detected_devices) { - errno = 0; + errno_clear(); collector_error("Cannot find a cgroup PID from cgroup '%s'", cgroup); } } diff --git a/src/collectors/cgroups.plugin/sys_fs_cgroup.c b/src/collectors/cgroups.plugin/sys_fs_cgroup.c index b515d00426..5fdefa863c 100644 --- a/src/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/src/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -73,30 +73,19 @@ struct discovery_thread discovery_thread; #define MAXSIZE_PROC_CMDLINE 4096 static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) { - pid_t command_pid; enum cgroups_systemd_setting retval = SYSTEMD_CGROUP_ERR; char buf[MAXSIZE_PROC_CMDLINE]; char *begin, *end; - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen(exec, &command_pid, &fp_child_input); - - if (!fp_child_output) + POPEN_INSTANCE *pi = spawn_popen_run(exec); + if(!pi) return retval; - int fd = fileno(fp_child_output); - if (fd == -1 ) { - collector_error("Cannot get the output of \"%s\": failed to get file descriptor", exec); - netdata_pclose(fp_child_input, fp_child_output, command_pid); - return retval; - } - struct pollfd pfd; - pfd.fd = fd; + pfd.fd = spawn_server_instance_read_fd(pi->si); pfd.events = POLLIN; int timeout = 3000; // milliseconds - int ret = poll(&pfd, 1, timeout); if (ret == -1) { @@ -104,7 +93,7 @@ static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) } else if (ret == 0) { collector_info("Cannot get the output of \"%s\" within timeout (%d ms)", exec, timeout); } else { - while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { + while (fgets(buf, MAXSIZE_PROC_CMDLINE, pi->child_stdout_fp) != NULL) { if ((begin = strstr(buf, SYSTEMD_HIERARCHY_STRING))) { end = begin = begin + strlen(SYSTEMD_HIERARCHY_STRING); if (!*begin) @@ -123,7 +112,7 @@ static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) } } - if (netdata_pclose(fp_child_input, fp_child_output, command_pid)) + if(spawn_popen_wait(pi) != 0) return SYSTEMD_CGROUP_ERR; return retval; @@ -159,25 +148,23 @@ static enum cgroups_type cgroups_try_detect_version() collector_info("cgroups version: can't detect using statfs (fs type), falling back to heuristics."); - pid_t command_pid; char buf[MAXSIZE_PROC_CMDLINE]; enum cgroups_systemd_setting systemd_setting; int cgroups2_available = 0; // 1. check if cgroups2 available on system at all - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen("grep cgroup /proc/filesystems", &command_pid, &fp_child_input); - if (!fp_child_output) { - collector_error("popen failed"); + POPEN_INSTANCE *instance = spawn_popen_run("grep cgroup /proc/filesystems"); + if(!instance) { + collector_error("cannot run 'grep cgroup /proc/filesystems'"); return CGROUPS_AUTODETECT_FAIL; } - while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { + while (fgets(buf, MAXSIZE_PROC_CMDLINE, instance->child_stdout_fp) != NULL) { if (strstr(buf, "cgroup2")) { cgroups2_available = 1; break; } } - if(netdata_pclose(fp_child_input, fp_child_output, command_pid)) + if(spawn_popen_wait(instance) != 0) return CGROUPS_AUTODETECT_FAIL; if(!cgroups2_available) diff --git a/src/collectors/cups.plugin/cups_plugin.c b/src/collectors/cups.plugin/cups_plugin.c index 4e452f0964..20b155e14c 100644 --- a/src/collectors/cups.plugin/cups_plugin.c +++ b/src/collectors/cups.plugin/cups_plugin.c @@ -231,7 +231,7 @@ int main(int argc, char **argv) { parse_command_line(argc, argv); - errno = 0; + errno_clear(); dict_dest_job_metrics = dictionary_create(DICT_OPTION_SINGLE_THREADED); diff --git a/src/collectors/ebpf.plugin/ebpf_apps.c b/src/collectors/ebpf.plugin/ebpf_apps.c index a17cdb33da..5249eaf551 100644 --- a/src/collectors/ebpf.plugin/ebpf_apps.c +++ b/src/collectors/ebpf.plugin/ebpf_apps.c @@ -441,7 +441,7 @@ static inline int managed_log(struct ebpf_pid_stat *p, uint32_t log, int status) } } } - errno = 0; + errno_clear(); } else if (unlikely(p->log_thrown & log)) { // netdata_log_error("unsetting log %u on pid %d", log, p->pid); p->log_thrown &= ~log; diff --git a/src/collectors/freeipmi.plugin/freeipmi_plugin.c b/src/collectors/freeipmi.plugin/freeipmi_plugin.c index 9168e99f64..38fb1d19b9 100644 --- a/src/collectors/freeipmi.plugin/freeipmi_plugin.c +++ b/src/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -1120,7 +1120,7 @@ static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *stt, } int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *stt) { - errno = 0; + errno_clear(); if(type & IPMI_COLLECT_TYPE_SENSORS) { stt->sensors.collected = 0; @@ -1930,7 +1930,7 @@ int main (int argc, char **argv) { collector_error("%s(): ignoring parameter '%s'", __FUNCTION__, argv[i]); } - errno = 0; + errno_clear(); if(freq_s && freq_s < update_every) collector_info("%s(): update frequency %d seconds is too small for IPMI. Using %d.", diff --git a/src/collectors/network-viewer.plugin/network-viewer.c b/src/collectors/network-viewer.plugin/network-viewer.c index 764151f5cf..204aa2cd10 100644 --- a/src/collectors/network-viewer.plugin/network-viewer.c +++ b/src/collectors/network-viewer.plugin/network-viewer.c @@ -4,12 +4,22 @@ #include "libnetdata/libnetdata.h" #include "libnetdata/required_dummies.h" +static SPAWN_SERVER *spawn_srv = NULL; + #define ENABLE_DETAILED_VIEW #define LOCAL_SOCKETS_EXTENDED_MEMBERS struct { \ size_t count; \ - const char *local_address_space; \ - const char *remote_address_space; \ + struct { \ + pid_t pid; \ + uid_t uid; \ + SOCKET_DIRECTION direction; \ + int state; \ + uint64_t net_ns_inode; \ + struct socket_endpoint server; \ + const char *local_address_space; \ + const char *remote_address_space; \ + } aggregated_key; \ } network_viewer; #include "libnetdata/maps/local-sockets.h" @@ -57,19 +67,49 @@ ENUM_STR_MAP_DEFINE(TCP_STATE) = { }; ENUM_STR_DEFINE_FUNCTIONS(TCP_STATE, 0, "unknown"); -static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t proc_self_net_ns_inode, bool aggregated) { +struct sockets_stats { + BUFFER *wb; + + struct { + uint32_t tcpi_rtt; + uint32_t tcpi_rcv_rtt; + uint32_t tcpi_total_retrans; + } max; +}; + +static void local_socket_to_json_array(struct sockets_stats *st, LOCAL_SOCKET *n, uint64_t proc_self_net_ns_inode, bool aggregated) { + if(n->direction == SOCKET_DIRECTION_NONE) + return; + + BUFFER *wb = st->wb; + char local_address[INET6_ADDRSTRLEN]; char remote_address[INET6_ADDRSTRLEN]; char *protocol; if(n->local.family == AF_INET) { ipv4_address_to_txt(n->local.ip.ipv4, local_address); - ipv4_address_to_txt(n->remote.ip.ipv4, remote_address); + + if(local_sockets_is_zero_address(&n->remote)) + remote_address[0] = '\0'; + else + ipv4_address_to_txt(n->remote.ip.ipv4, remote_address); + protocol = n->local.protocol == IPPROTO_TCP ? "tcp4" : "udp4"; } + else if(is_local_socket_ipv46(n)) { + strncpyz(local_address, "*", sizeof(local_address) - 1); + remote_address[0] = '\0'; + protocol = n->local.protocol == IPPROTO_TCP ? "tcp46" : "udp46"; + } else if(n->local.family == AF_INET6) { ipv6_address_to_txt(&n->local.ip.ipv6, local_address); - ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address); + + if(local_sockets_is_zero_address(&n->remote)) + remote_address[0] = '\0'; + else + ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address); + protocol = n->local.protocol == IPPROTO_TCP ? "tcp6" : "udp6"; } else @@ -117,43 +157,45 @@ static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t pro buffer_json_add_array_item_string(wb, local_address); buffer_json_add_array_item_uint64(wb, n->local.port); } - buffer_json_add_array_item_string(wb, n->network_viewer.local_address_space); + buffer_json_add_array_item_string(wb, n->network_viewer.aggregated_key.local_address_space); if(!aggregated) { buffer_json_add_array_item_string(wb, remote_address); buffer_json_add_array_item_uint64(wb, n->remote.port); } - buffer_json_add_array_item_string(wb, n->network_viewer.remote_address_space); + buffer_json_add_array_item_string(wb, n->network_viewer.aggregated_key.remote_address_space); - uint16_t server_port = 0; - const char *server_address = NULL; - const char *client_address_space = NULL; - const char *server_address_space = NULL; + const char *server_address; + const char *client_address_space; + const char *server_address_space; switch (n->direction) { case SOCKET_DIRECTION_LISTEN: case SOCKET_DIRECTION_INBOUND: case SOCKET_DIRECTION_LOCAL_INBOUND: - server_port = n->local.port; server_address = local_address; - server_address_space = n->network_viewer.local_address_space; - client_address_space = n->network_viewer.remote_address_space; + server_address_space = n->network_viewer.aggregated_key.local_address_space; + client_address_space = n->network_viewer.aggregated_key.remote_address_space; break; case SOCKET_DIRECTION_OUTBOUND: case SOCKET_DIRECTION_LOCAL_OUTBOUND: - server_port = n->remote.port; server_address = remote_address; - server_address_space = n->network_viewer.remote_address_space; - client_address_space = n->network_viewer.local_address_space; + server_address_space = n->network_viewer.aggregated_key.remote_address_space; + client_address_space = n->network_viewer.aggregated_key.local_address_space; break; case SOCKET_DIRECTION_NONE: + server_address = NULL; + client_address_space = NULL; + server_address_space = NULL; break; } - if(aggregated) - buffer_json_add_array_item_string(wb, server_address); - buffer_json_add_array_item_uint64(wb, server_port); + if(aggregated) { + buffer_json_add_array_item_string(wb, server_address); + } + + buffer_json_add_array_item_uint64(wb, n->network_viewer.aggregated_key.server.port); if(aggregated) { buffer_json_add_array_item_string(wb, client_address_space); @@ -162,58 +204,176 @@ static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t pro // buffer_json_add_array_item_uint64(wb, n->inode); // buffer_json_add_array_item_uint64(wb, n->net_ns_inode); + + // RTT + buffer_json_add_array_item_double(wb, (double)n->info.tcp.tcpi_rtt / (double)USEC_PER_MS); + if(st->max.tcpi_rtt < n->info.tcp.tcpi_rtt) + st->max.tcpi_rtt = n->info.tcp.tcpi_rtt; + + // Receiver RTT + buffer_json_add_array_item_double(wb, (double)n->info.tcp.tcpi_rcv_rtt / (double)USEC_PER_MS); + if(st->max.tcpi_rcv_rtt < n->info.tcp.tcpi_rcv_rtt) + st->max.tcpi_rcv_rtt = n->info.tcp.tcpi_rcv_rtt; + + // Retransmissions + buffer_json_add_array_item_uint64(wb, n->info.tcp.tcpi_total_retrans); + if(st->max.tcpi_total_retrans < n->info.tcp.tcpi_total_retrans) + st->max.tcpi_total_retrans = n->info.tcp.tcpi_total_retrans; + + // count buffer_json_add_array_item_uint64(wb, n->network_viewer.count); } buffer_json_array_close(wb); } -static void local_sockets_cb_to_json(LS_STATE *ls, LOCAL_SOCKET *n, void *data) { +static void populate_aggregated_key(LOCAL_SOCKET *n) { n->network_viewer.count = 1; - n->network_viewer.local_address_space = local_sockets_address_space(&n->local); - n->network_viewer.remote_address_space = local_sockets_address_space(&n->remote); - local_socket_to_json_array(data, n, ls->proc_self_net_ns_inode, false); -} -static void local_sockets_cb_to_aggregation(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n, void *data) { - SIMPLE_HASHTABLE_AGGREGATED_SOCKETS *ht = data; - n->network_viewer.count = 1; - n->network_viewer.local_address_space = local_sockets_address_space(&n->local); - n->network_viewer.remote_address_space = local_sockets_address_space(&n->remote); + n->network_viewer.aggregated_key.pid = n->pid; + n->network_viewer.aggregated_key.uid = n->uid; + n->network_viewer.aggregated_key.direction = n->direction; + n->network_viewer.aggregated_key.net_ns_inode = n->net_ns_inode; + n->network_viewer.aggregated_key.state = n->state; switch(n->direction) { case SOCKET_DIRECTION_INBOUND: case SOCKET_DIRECTION_LOCAL_INBOUND: case SOCKET_DIRECTION_LISTEN: - memset(&n->remote.ip, 0, sizeof(n->remote.ip)); - n->remote.port = 0; + n->network_viewer.aggregated_key.server = n->local; break; case SOCKET_DIRECTION_OUTBOUND: case SOCKET_DIRECTION_LOCAL_OUTBOUND: - memset(&n->local.ip, 0, sizeof(n->local.ip)); - n->local.port = 0; + n->network_viewer.aggregated_key.server = n->remote; break; case SOCKET_DIRECTION_NONE: - return; + break; } - n->inode = 0; - n->local_ip_hash = 0; - n->remote_ip_hash = 0; - n->local_port_hash = 0; - n->timer = 0; - n->retransmits = 0; - n->expires = 0; - n->rqueue = 0; - n->wqueue = 0; - memset(&n->local_port_key, 0, sizeof(n->local_port_key)); + n->network_viewer.aggregated_key.local_address_space = local_sockets_address_space(&n->local); + n->network_viewer.aggregated_key.remote_address_space = local_sockets_address_space(&n->remote); +} - XXH64_hash_t hash = XXH3_64bits(n, sizeof(*n)); +static void local_sockets_cb_to_json(LS_STATE *ls, LOCAL_SOCKET *n, void *data) { + struct sockets_stats *st = data; + populate_aggregated_key(n); + local_socket_to_json_array(st, n, ls->proc_self_net_ns_inode, false); +} + +#define KEEP_THE_BIGGER(a, b) (a) = ((a) < (b)) ? (b) : (a) +#define KEEP_THE_SMALLER(a, b) (a) = ((a) > (b)) ? (b) : (a) +#define SUM_THEM_ALL(a, b) (a) += (b) +#define OR_THEM_ALL(a, b) (a) |= (b) + +static void local_sockets_cb_to_aggregation(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n, void *data) { + SIMPLE_HASHTABLE_AGGREGATED_SOCKETS *ht = data; + + populate_aggregated_key(n); + XXH64_hash_t hash = XXH3_64bits(&n->network_viewer.aggregated_key, sizeof(n->network_viewer.aggregated_key)); SIMPLE_HASHTABLE_SLOT_AGGREGATED_SOCKETS *sl = simple_hashtable_get_slot_AGGREGATED_SOCKETS(ht, hash, n, true); LOCAL_SOCKET *t = SIMPLE_HASHTABLE_SLOT_DATA(sl); if(t) { t->network_viewer.count++; + + KEEP_THE_BIGGER(t->timer, n->timer); + KEEP_THE_BIGGER(t->retransmits, n->retransmits); + KEEP_THE_SMALLER(t->expires, n->expires); + KEEP_THE_BIGGER(t->rqueue, n->rqueue); + KEEP_THE_BIGGER(t->wqueue, n->wqueue); + + // The current number of consecutive retransmissions that have occurred for the most recently transmitted segment. + SUM_THEM_ALL(t->info.tcp.tcpi_retransmits, n->info.tcp.tcpi_retransmits); + + // The total number of retransmissions that have occurred for the entire connection since it was established. + SUM_THEM_ALL(t->info.tcp.tcpi_total_retrans, n->info.tcp.tcpi_total_retrans); + + // The total number of segments that have been retransmitted since the connection was established. + SUM_THEM_ALL(t->info.tcp.tcpi_retrans, n->info.tcp.tcpi_retrans); + + // The number of keepalive probes sent + SUM_THEM_ALL(t->info.tcp.tcpi_probes, n->info.tcp.tcpi_probes); + + // The number of times the retransmission timeout has been backed off. + SUM_THEM_ALL(t->info.tcp.tcpi_backoff, n->info.tcp.tcpi_backoff); + + // A bitmask representing the TCP options currently enabled for the connection, such as SACK and Timestamps. + OR_THEM_ALL(t->info.tcp.tcpi_options, n->info.tcp.tcpi_options); + + // The send window scale value used for this connection + KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_wscale, n->info.tcp.tcpi_snd_wscale); + + // The receive window scale value used for this connection + KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_wscale, n->info.tcp.tcpi_rcv_wscale); + + // Retransmission timeout in milliseconds + KEEP_THE_SMALLER(t->info.tcp.tcpi_rto, n->info.tcp.tcpi_rto); + + // The delayed acknowledgement timeout in milliseconds. + KEEP_THE_SMALLER(t->info.tcp.tcpi_ato, n->info.tcp.tcpi_ato); + + // The maximum segment size for sending. + KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_mss, n->info.tcp.tcpi_snd_mss); + + // The maximum segment size for receiving. + KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_mss, n->info.tcp.tcpi_rcv_mss); + + // The number of unacknowledged segments + SUM_THEM_ALL(t->info.tcp.tcpi_unacked, n->info.tcp.tcpi_unacked); + + // The number of segments that have been selectively acknowledged + SUM_THEM_ALL(t->info.tcp.tcpi_sacked, n->info.tcp.tcpi_sacked); + + // The number of segments that have been selectively acknowledged + SUM_THEM_ALL(t->info.tcp.tcpi_sacked, n->info.tcp.tcpi_sacked); + + // The number of lost segments. + SUM_THEM_ALL(t->info.tcp.tcpi_lost, n->info.tcp.tcpi_lost); + + // The number of forward acknowledgment segments. + SUM_THEM_ALL(t->info.tcp.tcpi_fackets, n->info.tcp.tcpi_fackets); + + // The time in milliseconds since the last data was sent. + KEEP_THE_SMALLER(t->info.tcp.tcpi_last_data_sent, n->info.tcp.tcpi_last_data_sent); + + // The time in milliseconds since the last acknowledgment was sent (not tracked in Linux, hence often zero). + KEEP_THE_SMALLER(t->info.tcp.tcpi_last_ack_sent, n->info.tcp.tcpi_last_ack_sent); + + // The time in milliseconds since the last data was received. + KEEP_THE_SMALLER(t->info.tcp.tcpi_last_data_recv, n->info.tcp.tcpi_last_data_recv); + + // The time in milliseconds since the last acknowledgment was received. + KEEP_THE_SMALLER(t->info.tcp.tcpi_last_ack_recv, n->info.tcp.tcpi_last_ack_recv); + + // The path MTU for this connection + KEEP_THE_SMALLER(t->info.tcp.tcpi_pmtu, n->info.tcp.tcpi_pmtu); + + // The slow start threshold for receiving + KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_ssthresh, n->info.tcp.tcpi_rcv_ssthresh); + + // The slow start threshold for sending + KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_ssthresh, n->info.tcp.tcpi_snd_ssthresh); + + // The round trip time in milliseconds + KEEP_THE_BIGGER(t->info.tcp.tcpi_rtt, n->info.tcp.tcpi_rtt); + + // The round trip time variance in milliseconds. + KEEP_THE_BIGGER(t->info.tcp.tcpi_rttvar, n->info.tcp.tcpi_rttvar); + + // The size of the sending congestion window. + KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_cwnd, n->info.tcp.tcpi_snd_cwnd); + + // The maximum segment size that could be advertised. + KEEP_THE_BIGGER(t->info.tcp.tcpi_advmss, n->info.tcp.tcpi_advmss); + + // The reordering metric + KEEP_THE_SMALLER(t->info.tcp.tcpi_reordering, n->info.tcp.tcpi_reordering); + + // The receive round trip time in milliseconds. + KEEP_THE_BIGGER(t->info.tcp.tcpi_rcv_rtt, n->info.tcp.tcpi_rcv_rtt); + + // The available space in the receive buffer. + KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_space, n->info.tcp.tcpi_rcv_space); } else { t = mallocz(sizeof(*t)); @@ -240,6 +400,10 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu wb->content_type = CT_APPLICATION_JSON; buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + struct sockets_stats st = { + .wb = wb, + }; + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); buffer_json_member_add_string(wb, "type", "table"); buffer_json_member_add_time_t(wb, "update_every", 5); @@ -328,9 +492,12 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu .cmdline = true, .comm = true, .namespaces = true, + .tcp_info = true, .max_errors = 10, + .max_concurrent_namespaces = 5, }, + .spawn_server = spawn_srv, .stats = { 0 }, .sockets_hashtable = { 0 }, .local_ips_hashtable = { 0 }, @@ -345,7 +512,7 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu } else { ls.config.cb = local_sockets_cb_to_json; - ls.config.data = wb; + ls.config.data = &st; } local_sockets_process(&ls); @@ -366,7 +533,7 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu qsort(array, added, sizeof(LOCAL_SOCKET *), local_sockets_compar); for(size_t i = 0; i < added ;i++) { - local_socket_to_json_array(wb, array[i], proc_self_net_ns_inode, true); + local_socket_to_json_array(&st, array[i], proc_self_net_ns_inode, true); string_freez(array[i]->cmdline); freez(array[i]); } @@ -555,14 +722,40 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu // RRDF_FIELD_OPTS_NONE, // NULL); + + // RTT + buffer_rrdf_table_add_field(wb, field_id++, "RTT", aggregated ? "Max Smoothed Round Trip Time" : "Smoothed Round Trip Time", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "ms", st.max.tcpi_rtt / USEC_PER_MS, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Asymmetry RTT + buffer_rrdf_table_add_field(wb, field_id++, "RecvRTT", aggregated ? "Max Receiver ACKs RTT" : "Receiver ACKs RTT", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "ms", st.max.tcpi_rcv_rtt / USEC_PER_MS, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Rentrasmissions + buffer_rrdf_table_add_field(wb, field_id++, "Retrans", "Total Retransmissions", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, "packets", st.max.tcpi_total_retrans, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + // Count buffer_rrdf_table_add_field(wb, field_id++, "Count", "Number of sockets like this", RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, - 0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + 0, "sockets", NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, aggregated ? (RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY) : RRDF_FIELD_OPTS_NONE, NULL); } + buffer_json_object_close(wb); // columns buffer_json_member_add_string(wb, "default_sort_column", aggregated ? "Count" : "Direction"); @@ -747,18 +940,28 @@ int main(int argc __maybe_unused, char **argv __maybe_unused) { uc = system_usernames_cache_init(); + spawn_srv = spawn_server_create("setns", local_sockets_spawn_server_callback, argc, (const char **)argv); + if(spawn_srv == NULL) { + fprintf(stderr, "Cannot create spawn server.\n"); + exit(1); + } + // ---------------------------------------------------------------------------------------------------------------- if(argc == 2 && strcmp(argv[1], "debug") == 0) { - bool cancelled = false; - usec_t stop_monotonic_ut = now_monotonic_usec() + 600 * USEC_PER_SEC; - char buf[] = "network-connections sockets:aggregated"; - network_viewer_function("123", buf, &stop_monotonic_ut, &cancelled, - NULL, HTTP_ACCESS_ALL, NULL, NULL); +// for(int i = 0; i < 100; i++) { + bool cancelled = false; + usec_t stop_monotonic_ut = now_monotonic_usec() + 600 * USEC_PER_SEC; + char buf[] = "network-connections sockets:aggregated"; + network_viewer_function("123", buf, &stop_monotonic_ut, &cancelled, + NULL, HTTP_ACCESS_ALL, NULL, NULL); - char buf2[] = "network-connections sockets:detailed"; - network_viewer_function("123", buf2, &stop_monotonic_ut, &cancelled, - NULL, HTTP_ACCESS_ALL, NULL, NULL); + char buf2[] = "network-connections sockets:detailed"; + network_viewer_function("123", buf2, &stop_monotonic_ut, &cancelled, + NULL, HTTP_ACCESS_ALL, NULL, NULL); +// } + + spawn_server_destroy(spawn_srv); exit(1); } @@ -799,5 +1002,8 @@ int main(int argc __maybe_unused, char **argv __maybe_unused) { } } + spawn_server_destroy(spawn_srv); + spawn_srv = NULL; + return 0; } diff --git a/src/collectors/nfacct.plugin/plugin_nfacct.c b/src/collectors/nfacct.plugin/plugin_nfacct.c index d3d18a3639..92c82351a2 100644 --- a/src/collectors/nfacct.plugin/plugin_nfacct.c +++ b/src/collectors/nfacct.plugin/plugin_nfacct.c @@ -809,7 +809,7 @@ int main(int argc, char **argv) { nfacct_signals(); - errno = 0; + errno_clear(); if(freq >= netdata_update_every) netdata_update_every = freq; diff --git a/src/collectors/perf.plugin/perf_plugin.c b/src/collectors/perf.plugin/perf_plugin.c index eb24b55e1a..8fb4014e4b 100644 --- a/src/collectors/perf.plugin/perf_plugin.c +++ b/src/collectors/perf.plugin/perf_plugin.c @@ -1288,7 +1288,7 @@ int main(int argc, char **argv) { parse_command_line(argc, argv); - errno = 0; + errno_clear(); if(freq >= update_every) update_every = freq; diff --git a/src/collectors/plugins.d/local_listeners.c b/src/collectors/plugins.d/local_listeners.c index 2829b3e37c..577e0e4670 100644 --- a/src/collectors/plugins.d/local_listeners.c +++ b/src/collectors/plugins.d/local_listeners.c @@ -15,6 +15,14 @@ static const char *protocol_name(LOCAL_SOCKET *n) { else return "UNKNOWN_IPV4"; } + else if(is_local_socket_ipv46(n)) { + if (n->local.protocol == IPPROTO_TCP) + return "TCP46"; + else if(n->local.protocol == IPPROTO_UDP) + return "UDP46"; + else + return "UNKNOWN_IPV46"; + } else if(n->local.family == AF_INET6) { if (n->local.protocol == IPPROTO_TCP) return "TCP6"; @@ -35,6 +43,10 @@ static void print_local_listeners(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n, ipv4_address_to_txt(n->local.ip.ipv4, local_address); ipv4_address_to_txt(n->remote.ip.ipv4, remote_address); } + else if(is_local_socket_ipv46(n)) { + strncpyz(local_address, "*", sizeof(local_address) - 1); + remote_address[0] = '\0'; + } else if(n->local.family == AF_INET6) { ipv6_address_to_txt(&n->local.ip.ipv6, local_address); ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address); @@ -93,8 +105,10 @@ int main(int argc, char **argv) { .cmdline = true, .comm = false, .namespaces = true, + .tcp_info = false, .max_errors = 10, + .max_concurrent_namespaces = 10, .cb = print_local_listeners, .data = NULL, @@ -212,6 +226,7 @@ int main(int argc, char **argv) { ls.config.comm = true; ls.config.cmdline = true; ls.config.namespaces = true; + ls.config.tcp_info = true; ls.config.uid = true; ls.config.max_errors = SIZE_MAX; ls.config.cb = print_local_listeners_debug; @@ -276,8 +291,17 @@ int main(int argc, char **argv) { } } + SPAWN_SERVER *spawn_server = spawn_server_create(NULL, local_sockets_spawn_server_callback, argc, (const char **)argv); + if(spawn_server == NULL) { + fprintf(stderr, "Cannot create spawn server.\n"); + exit(1); + } + ls.spawn_server = spawn_server; + local_sockets_process(&ls); + spawn_server_destroy(spawn_server); + getrusage(RUSAGE_SELF, &ended); if(debug) { @@ -285,7 +309,7 @@ int main(int argc, char **argv) { unsigned long long system = ended.ru_stime.tv_sec * 1000000ULL + ended.ru_stime.tv_usec - started.ru_stime.tv_sec * 1000000ULL + started.ru_stime.tv_usec; unsigned long long total = user + system; - fprintf(stderr, "CPU Usage %llu user, %llu system, %llu total\n", user, system, total); + fprintf(stderr, "CPU Usage %llu user, %llu system, %llu total, %zu namespaces, %zu nl requests (without namespaces)\n", user, system, total, ls.stats.namespaces_found, ls.stats.mnl_sends); } return 0; diff --git a/src/collectors/plugins.d/plugins_d.c b/src/collectors/plugins.d/plugins_d.c index f5f55b7702..d7ffe7fada 100644 --- a/src/collectors/plugins.d/plugins_d.c +++ b/src/collectors/plugins.d/plugins_d.c @@ -68,23 +68,15 @@ static void pluginsd_worker_thread_cleanup(void *pptr) { cd->unsafe.running = false; cd->unsafe.thread = 0; - pid_t pid = cd->unsafe.pid; cd->unsafe.pid = 0; + POPEN_INSTANCE *pi = cd->unsafe.pi; + cd->unsafe.pi = NULL; + spinlock_unlock(&cd->unsafe.spinlock); - if (pid) { - siginfo_t info; - netdata_log_info("PLUGINSD: 'host:%s', killing data collection child process with pid %d", - rrdhost_hostname(cd->host), pid); - - if (killpid(pid) != -1) { - netdata_log_info("PLUGINSD: 'host:%s', waiting for data collection child process pid %d to exit...", - rrdhost_hostname(cd->host), pid); - - netdata_waitid(P_PID, (id_t)pid, &info, WEXITED); - } - } + if (pi) + spawn_popen_kill(pi); } #define SERIAL_FAILURES_THRESHOLD 10 @@ -160,14 +152,13 @@ static void *pluginsd_worker_thread(void *arg) { size_t count = 0; while(service_running(SERVICE_COLLECTORS)) { - FILE *fp_child_input = NULL; - FILE *fp_child_output = netdata_popen(cd->cmd, &cd->unsafe.pid, &fp_child_input); - - if(unlikely(!fp_child_input || !fp_child_output)) { + cd->unsafe.pi = spawn_popen_run(cd->cmd); + if(!cd->unsafe.pi) { netdata_log_error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").", rrdhost_hostname(cd->host), cd->cmd); break; } + cd->unsafe.pid = spawn_server_instance_pid(cd->unsafe.pi->si); nd_log(NDLS_DAEMON, NDLP_DEBUG, "PLUGINSD: 'host:%s' connected to '%s' running on pid %d", @@ -190,15 +181,14 @@ static void *pluginsd_worker_thread(void *arg) { }; ND_LOG_STACK_PUSH(lgs); - count = pluginsd_process(cd->host, cd, fp_child_input, fp_child_output, 0); + count = pluginsd_process(cd->host, cd, cd->unsafe.pi->child_stdin_fp, cd->unsafe.pi->child_stdout_fp, 0); nd_log(NDLS_DAEMON, NDLP_DEBUG, "PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, count); - killpid(cd->unsafe.pid); - - int worker_ret_code = netdata_pclose(fp_child_input, fp_child_output, cd->unsafe.pid); + int worker_ret_code = spawn_popen_kill(cd->unsafe.pi); + cd->unsafe.pi = NULL; if(likely(worker_ret_code == 0)) pluginsd_worker_thread_handle_success(cd); @@ -273,7 +263,7 @@ void *pluginsd_main(void *ptr) { if (unlikely(!service_running(SERVICE_COLLECTORS))) break; - errno = 0; + errno_clear(); DIR *dir = opendir(directory_name); if (unlikely(!dir)) { if (directory_errors[idx] != errno) { diff --git a/src/collectors/plugins.d/plugins_d.h b/src/collectors/plugins.d/plugins_d.h index ec17c3145a..51efa5a72e 100644 --- a/src/collectors/plugins.d/plugins_d.h +++ b/src/collectors/plugins.d/plugins_d.h @@ -34,6 +34,7 @@ struct plugind { bool running; // do not touch this structure after setting this to 1 bool enabled; // if this is enabled or not ND_THREAD *thread; + POPEN_INSTANCE *pi; pid_t pid; } unsafe; diff --git a/src/collectors/plugins.d/pluginsd_internals.c b/src/collectors/plugins.d/pluginsd_internals.c index d03daf745d..31f0f75393 100644 --- a/src/collectors/plugins.d/pluginsd_internals.c +++ b/src/collectors/plugins.d/pluginsd_internals.c @@ -13,7 +13,7 @@ ssize_t send_to_plugin(const char *txt, void *data) { return h2o_stream_write(parser->h2o_ctx, txt, strlen(txt)); #endif - errno = 0; + errno_clear(); spinlock_lock(&parser->writer.spinlock); ssize_t bytes = -1; diff --git a/src/collectors/proc.plugin/proc_meminfo.c b/src/collectors/proc.plugin/proc_meminfo.c index c11b4f6422..db458b2394 100644 --- a/src/collectors/proc.plugin/proc_meminfo.c +++ b/src/collectors/proc.plugin/proc_meminfo.c @@ -29,7 +29,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { static ARL_BASE *arl_base = NULL; static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL, *arl_hugepages_total = NULL, - *arl_zswapped = NULL, *arl_high_low = NULL, *arl_cma_total = NULL, + *arl_zswapped = NULL, *arl_high_low = NULL, *arl_directmap4k = NULL, *arl_directmap2m = NULL, *arl_directmap4m = NULL, *arl_directmap1g = NULL; static unsigned long long @@ -189,7 +189,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { arl_expect(arl_base, "FilePmdMapped", &FilePmdMapped); // CONFIG_CMA - arl_cma_total = arl_expect(arl_base, "CmaTotal", &CmaTotal); + arl_expect(arl_base, "CmaTotal", &CmaTotal); arl_expect(arl_base, "CmaFree", &CmaFree); // CONFIG_UNACCEPTED_MEMORY diff --git a/src/collectors/systemd-journal.plugin/systemd-journal.c b/src/collectors/systemd-journal.plugin/systemd-journal.c index 57d7ecbc41..6da9c687e0 100644 --- a/src/collectors/systemd-journal.plugin/systemd-journal.c +++ b/src/collectors/systemd-journal.plugin/systemd-journal.c @@ -1037,7 +1037,7 @@ static ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_one_file( struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) { sd_journal *j = NULL; - errno = 0; + errno_clear(); fstat_cache_enable_on_thread(); diff --git a/src/collectors/tc.plugin/plugin_tc.c b/src/collectors/tc.plugin/plugin_tc.c index d2599f7286..da2a39194d 100644 --- a/src/collectors/tc.plugin/plugin_tc.c +++ b/src/collectors/tc.plugin/plugin_tc.c @@ -834,7 +834,7 @@ static inline void tc_split_words(char *str, char **words, int max_words) { while(i < max_words) words[i++] = NULL; } -static pid_t tc_child_pid = 0; +static POPEN_INSTANCE *tc_child_instance = NULL; static void tc_main_cleanup(void *pptr) { struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); @@ -847,16 +847,10 @@ static void tc_main_cleanup(void *pptr) { collector_info("cleaning up..."); - if(tc_child_pid) { - collector_info("TC: killing with SIGTERM tc-qos-helper process %d", tc_child_pid); - if(killpid(tc_child_pid) != -1) { - siginfo_t info; - - collector_info("TC: waiting for tc plugin child process pid %d to exit...", tc_child_pid); - netdata_waitid(P_PID, (id_t) tc_child_pid, &info, WEXITED); - } - - tc_child_pid = 0; + if(tc_child_instance) { + collector_info("TC: stopping the running tc-qos-helper script"); + int code = spawn_popen_wait(tc_child_instance); (void)code; + tc_child_instance = NULL; } static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; @@ -921,21 +915,20 @@ void *tc_main(void *ptr) { char *tc_script = config_get("plugin:tc", "script to run to get tc values", command); while(service_running(SERVICE_COLLECTORS)) { - FILE *fp_child_input, *fp_child_output; struct tc_device *device = NULL; struct tc_class *class = NULL; snprintfz(command, TC_LINE_MAX, "exec %s %d", tc_script, localhost->rrd_update_every); netdata_log_debug(D_TC_LOOP, "executing '%s'", command); - fp_child_output = netdata_popen(command, (pid_t *)&tc_child_pid, &fp_child_input); - if(unlikely(!fp_child_output)) { + tc_child_instance = spawn_popen_run(command); + if(!tc_child_instance) { collector_error("TC: Cannot popen(\"%s\", \"r\").", command); goto cleanup; } char buffer[TC_LINE_MAX+1] = ""; - while(fgets(buffer, TC_LINE_MAX, fp_child_output) != NULL) { + while(fgets(buffer, TC_LINE_MAX, tc_child_instance->child_stdout_fp) != NULL) { if(unlikely(!service_running(SERVICE_COLLECTORS))) break; buffer[TC_LINE_MAX] = '\0'; @@ -1142,8 +1135,8 @@ void *tc_main(void *ptr) { } // fgets() failed or loop broke - int code = netdata_pclose(fp_child_input, fp_child_output, (pid_t)tc_child_pid); - tc_child_pid = 0; + int code = spawn_popen_kill(tc_child_instance); + tc_child_instance = NULL; if(unlikely(device)) { // tc_device_free(device); diff --git a/src/collectors/xenstat.plugin/xenstat_plugin.c b/src/collectors/xenstat.plugin/xenstat_plugin.c index b17b746f5b..e4b8a2bd09 100644 --- a/src/collectors/xenstat.plugin/xenstat_plugin.c +++ b/src/collectors/xenstat.plugin/xenstat_plugin.c @@ -986,7 +986,7 @@ int main(int argc, char **argv) { netdata_log_error("xenstat.plugin: ignoring parameter '%s'", argv[i]); } - errno = 0; + errno_clear(); if(freq >= netdata_update_every) netdata_update_every = freq; diff --git a/src/daemon/analytics.c b/src/daemon/analytics.c index e9abf11c88..0e5c221c41 100644 --- a/src/daemon/analytics.c +++ b/src/daemon/analytics.c @@ -326,18 +326,15 @@ void analytics_alarms_notifications(void) strcat(script, " dump_methods"); - pid_t command_pid; - netdata_log_debug(D_ANALYTICS, "Executing %s", script); BUFFER *b = buffer_create(1000, NULL); int cnt = 0; - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); - if (fp_child_output) { + POPEN_INSTANCE *instance = spawn_popen_run(script); + if (instance) { char line[200 + 1]; - while (fgets(line, 200, fp_child_output) != NULL) { + while (fgets(line, 200, instance->child_stdout_fp) != NULL) { char *end = line; while (*end && *end != '\n') end++; @@ -350,7 +347,7 @@ void analytics_alarms_notifications(void) cnt++; } - netdata_pclose(fp_child_input, fp_child_output, command_pid); + spawn_popen_wait(instance); } freez(script); @@ -1001,8 +998,6 @@ void analytics_statistic_send(const analytics_statistic_t *statistic) { char *command_to_run = mallocz( sizeof(char) * (strlen(statistic->action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15)); - pid_t command_pid; - sprintf( command_to_run, "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", @@ -1055,12 +1050,11 @@ void analytics_statistic_send(const analytics_statistic_t *statistic) { "%s '%s' '%s' '%s'", as_script, statistic->action, action_result, action_data); - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input); - if (fp_child_output) { + POPEN_INSTANCE *instance = spawn_popen_run(command_to_run); + if (instance) { char buffer[4 + 1]; - char *s = fgets(buffer, 4, fp_child_output); - int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid); + char *s = fgets(buffer, 4, instance->child_stdout_fp); + int exit_code = spawn_popen_wait(instance); if (exit_code) nd_log(NDLS_DAEMON, NDLP_NOTICE, diff --git a/src/daemon/buildinfo.c b/src/daemon/buildinfo.c index 4ee5b43de0..a4a3152306 100644 --- a/src/daemon/buildinfo.c +++ b/src/daemon/buildinfo.c @@ -75,6 +75,7 @@ typedef enum __attribute__((packed)) { BIB_LIB_LIBCAP, BIB_LIB_LIBCRYPTO, BIB_LIB_LIBYAML, + BIB_LIB_LIBMNL, BIB_PLUGIN_APPS, BIB_PLUGIN_LINUX_CGROUPS, BIB_PLUGIN_LINUX_CGROUP_NETWORK, @@ -698,6 +699,14 @@ static struct { .json = "libyaml", .value = NULL, }, + [BIB_LIB_LIBMNL] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "libmnl", + .print = "libmnl (library for working with netfilter)", + .json = "libmnl", + .value = NULL, + }, [BIB_PLUGIN_APPS] = { .category = BIC_PLUGINS, .type = BIT_BOOLEAN, @@ -1177,6 +1186,9 @@ __attribute__((constructor)) void initialize_build_info(void) { #ifdef HAVE_LIBYAML build_info_set_status(BIB_LIB_LIBYAML, true); #endif +#ifdef HAVE_LIBMNL + build_info_set_status(BIB_LIB_LIBMNL, true); +#endif #ifdef ENABLE_PLUGIN_APPS build_info_set_status(BIB_PLUGIN_APPS, true); @@ -1278,9 +1290,18 @@ static void populate_system_info(void) { system_info = localhost->system_info; } else { + bool started_spawn_server = false; + if(!netdata_main_spawn_server) { + started_spawn_server = true; + netdata_main_spawn_server_init(NULL, 0, NULL); + } + system_info = callocz(1, sizeof(struct rrdhost_system_info)); get_system_info(system_info); free_system_info = true; + + if(started_spawn_server) + netdata_main_spawn_server_cleanup(); } build_info_set_value_strdupz(BIB_OS_KERNEL_NAME, system_info->kernel_name); diff --git a/src/daemon/common.c b/src/daemon/common.c index a64d53585e..6c824eec68 100644 --- a/src/daemon/common.c +++ b/src/daemon/common.c @@ -44,7 +44,7 @@ long get_netdata_cpus(void) { long cores_user_configured = config_get_number(CONFIG_SECTION_GLOBAL, "cpu cores", processors); - errno = 0; + errno_clear(); internal_error(true, "System CPUs: %ld, (" "system: %ld, cgroups cpuset v1: %ld, cgroups cpuset v2: %ld, netdata.conf: %ld" diff --git a/src/daemon/common.h b/src/daemon/common.h index 102ec81e2f..1dea19c5b8 100644 --- a/src/daemon/common.h +++ b/src/daemon/common.h @@ -84,9 +84,6 @@ // global GUID map functions -// netdata agent spawn server -#include "spawn/spawn.h" - // the netdata daemon #include "daemon.h" #include "main.h" diff --git a/src/daemon/daemon.c b/src/daemon/daemon.c index f77b748a84..2392d4cc1d 100644 --- a/src/daemon/daemon.c +++ b/src/daemon/daemon.c @@ -381,14 +381,14 @@ static void sched_setscheduler_set(void) { priority = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process scheduling priority", priority); #ifdef HAVE_SCHED_GET_PRIORITY_MIN - errno = 0; + errno_clear(); if(priority < sched_get_priority_min(policy)) { netdata_log_error("scheduler %s (%d) priority %d is below the minimum %d. Using the minimum.", name, policy, priority, sched_get_priority_min(policy)); priority = sched_get_priority_min(policy); } #endif #ifdef HAVE_SCHED_GET_PRIORITY_MAX - errno = 0; + errno_clear(); if(priority > sched_get_priority_max(policy)) { netdata_log_error("scheduler %s (%d) priority %d is above the maximum %d. Using the maximum.", name, policy, priority, sched_get_priority_max(policy)); priority = sched_get_priority_max(policy); @@ -407,7 +407,7 @@ static void sched_setscheduler_set(void) { .sched_priority = priority }; - errno = 0; + errno_clear(); i = sched_setscheduler(0, policy, ¶m); if(i != 0) { netdata_log_error("Cannot adjust netdata scheduling policy to %s (%d), with priority %d. Falling back to nice.", diff --git a/src/daemon/main.c b/src/daemon/main.c index 75fa13356e..c7a3bc740a 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -26,7 +26,6 @@ int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; bool ieee754_doubles = false; time_t netdata_start_time = 0; struct netdata_static_thread *static_threads; -bool i_am_the_spawn_server = false; struct config netdata_config = { .first_section = NULL, @@ -325,9 +324,6 @@ static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) { void web_client_cache_destroy(void); void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) { - if (i_am_the_spawn_server) - exit(ret); - watcher_shutdown_begin(); nd_log_limits_unlimited(); @@ -490,9 +486,12 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re #endif watcher_step_complete(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); + netdata_main_spawn_server_cleanup(); + watcher_step_complete(WATCHER_STEP_ID_DESTROY_MAIN_SPAWN_SERVER); + (void) unlink(agent_incomplete_shutdown_file); watcher_step_complete(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); - + watcher_shutdown_end(); watcher_thread_stop(); @@ -621,39 +620,6 @@ void web_server_config_options(void) } } - -// killpid kills pid with SIGTERM. -int killpid(pid_t pid) { - int ret; - netdata_log_debug(D_EXIT, "Request to kill pid %d", pid); - - int signal = SIGTERM; -//#ifdef NETDATA_INTERNAL_CHECKS -// if(service_running(SERVICE_COLLECTORS)) -// signal = SIGABRT; -//#endif - - errno = 0; - ret = kill(pid, signal); - if (ret == -1) { - switch(errno) { - case ESRCH: - // We wanted the process to exit so just let the caller handle. - return ret; - - case EPERM: - netdata_log_error("Cannot kill pid %d, but I do not have enough permissions.", pid); - break; - - default: - netdata_log_error("Cannot kill pid %d, but I received an error.", pid); - break; - } - } - - return ret; -} - static void set_nofile_limit(struct rlimit *rl) { // get the num files allowed if(getrlimit(RLIMIT_NOFILE, rl) != 0) { @@ -1333,7 +1299,7 @@ static void post_conf_load(char **user) } static bool load_netdata_conf(char *filename, char overwrite_used, char **user) { - errno = 0; + errno_clear(); int ret = 0; @@ -1380,15 +1346,12 @@ int get_system_info(struct rrdhost_system_info *system_info) { return 1; } - pid_t command_pid; - - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); - if(fp_child_output) { + POPEN_INSTANCE *instance = spawn_popen_run(script); + if(instance) { char line[200 + 1]; // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert. // One time init code, but I'm curious about the warning... - while (fgets(line, 200, fp_child_output) != NULL) { + while (fgets(line, 200, instance->child_stdout_fp) != NULL) { char *value=line; while (*value && *value != '=') value++; if (*value=='=') { @@ -1407,7 +1370,7 @@ int get_system_info(struct rrdhost_system_info *system_info) { } } } - netdata_pclose(fp_child_input, fp_child_output, command_pid); + spawn_popen_wait(instance); } freez(script); #else @@ -1464,15 +1427,12 @@ int unittest_prepare_rrd(char **user) { return 0; } -int netdata_main(int argc, char **argv) -{ - analytics_init(); - string_init(); - - // initialize the system clocks +int netdata_main(int argc, char **argv) { clocks_init(); - netdata_start_time = now_realtime_sec(); + string_init(); + analytics_init(); + netdata_start_time = now_realtime_sec(); usec_t started_ut = now_monotonic_usec(); usec_t last_ut = started_ut; const char *prev_msg = NULL; @@ -1495,13 +1455,6 @@ int netdata_main(int argc, char **argv) // set the name for logging program_name = "netdata"; - if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) { - // don't run netdata, this is the spawn server - i_am_the_spawn_server = true; - spawn_server(); - exit(0); - } - // parse options { int num_opts = sizeof(option_definitions) / sizeof(struct option_def); @@ -1966,7 +1919,7 @@ int netdata_main(int argc, char **argv) if (close_open_fds == true) { // close all open file descriptors, except the standard ones // the caller may have left open files (lxc-attach has this issue) - for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR); + os_close_all_non_std_open_fds_except(NULL, 0); } if(!config_loaded) { @@ -2196,6 +2149,7 @@ int netdata_main(int argc, char **argv) (void)dont_fork; #endif + netdata_main_spawn_server_init("plugins", argc, (const char **)argv); watcher_thread_start(); // init sentry @@ -2228,10 +2182,6 @@ int netdata_main(int argc, char **argv) // fork the spawn server delta_startup_time("fork the spawn server"); -#ifndef OS_WINDOWS - spawn_init(); -#endif - /* * Libuv uv_spawn() uses SIGCHLD internally: * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485 @@ -2288,6 +2238,7 @@ int netdata_main(int argc, char **argv) if (claiming_pending_arguments) claim_agent(claiming_pending_arguments, false, NULL); + load_claiming_state(); // ------------------------------------------------------------------------ diff --git a/src/daemon/main.h b/src/daemon/main.h index faf7d5b69d..3188623b6a 100644 --- a/src/daemon/main.h +++ b/src/daemon/main.h @@ -8,7 +8,6 @@ extern struct config netdata_config; void cancel_main_threads(void); -int killpid(pid_t pid); typedef enum { ABILITY_DATA_QUERIES = (1 << 0), diff --git a/src/daemon/signals.c b/src/daemon/signals.c index c014452b7d..c014a75939 100644 --- a/src/daemon/signals.c +++ b/src/daemon/signals.c @@ -118,61 +118,45 @@ void signals_reset(void) { } } -// reap_child reaps the child identified by pid. -static void reap_child(pid_t pid) { - siginfo_t i; +static void sigchild_handle() { + int status; + pid_t pid; - errno = 0; - netdata_log_debug(D_CHILDS, "SIGNAL: reap_child(%d)...", pid); - if (netdata_waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) { - if (errno != ECHILD) - netdata_log_error("SIGNAL: waitid(%d): failed to wait for child", pid); - else - netdata_log_info("SIGNAL: waitid(%d): failed - it seems the child is already reaped", pid); - return; - } - else if (i.si_pid == 0) { - // Process didn't exit, this shouldn't happen. - netdata_log_error("SIGNAL: waitid(%d): reports pid 0 - child has not exited", pid); - return; - } + // Loop to check for exited child processes + while ((pid = waitpid((pid_t)(-1), &status, WNOHANG)) != 0) { + if(pid == -1) + break; - switch (i.si_code) { - case CLD_EXITED: - netdata_log_info("SIGNAL: reap_child(%d) exited with code: %d", pid, i.si_status); - break; - case CLD_KILLED: - netdata_log_info("SIGNAL: reap_child(%d) killed by signal: %d", pid, i.si_status); - break; - case CLD_DUMPED: - netdata_log_info("SIGNAL: reap_child(%d) dumped core by signal: %d", pid, i.si_status); - break; - case CLD_STOPPED: - netdata_log_info("SIGNAL: reap_child(%d) stopped by signal: %d", pid, i.si_status); - break; - case CLD_TRAPPED: - netdata_log_info("SIGNAL: reap_child(%d) trapped by signal: %d", pid, i.si_status); - break; - case CLD_CONTINUED: - netdata_log_info("SIGNAL: reap_child(%d) continued by signal: %d", pid, i.si_status); - break; - default: - netdata_log_info("SIGNAL: reap_child(%d) gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status); - break; - } -} - -// reap_children reaps all pending children which are not managed by myp. -static void reap_children() { - siginfo_t i; - - while(1) { - i.si_pid = 0; - if (netdata_waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1 || i.si_pid == 0) - // nothing to do - return; - - reap_child(i.si_pid); + if(WIFEXITED(status)) { + nd_log(NDLS_DAEMON, NDLP_INFO, + "DAEMON: child with pid %d exited normally with exit code %d", + pid, WEXITSTATUS(status)); + } + else if(WIFSIGNALED(status)) { + if(WCOREDUMP(status)) + nd_log(NDLS_DAEMON, NDLP_INFO, + "DAEMON: child with pid %d coredump'd due to signal %d", + pid, WTERMSIG(status)); + else + nd_log(NDLS_DAEMON, NDLP_INFO, + "DAEMON: child with pid %d killed by signal %d", + pid, WTERMSIG(status)); + } + else if(WIFSTOPPED(status)) { + nd_log(NDLS_DAEMON, NDLP_INFO, + "DAEMON: child with pid %d stopped due to signal %d", + pid, WSTOPSIG(status)); + } + else if(WIFCONTINUED(status)) { + nd_log(NDLS_DAEMON, NDLP_INFO, + "DAEMON: child with pid %d continued due to signal %d", + pid, SIGCONT); + } + else { + nd_log(NDLS_COLLECTORS, NDLP_INFO, + "DAEMON: child with pid %d reports unhandled status", + pid); + } } } @@ -183,6 +167,7 @@ void signals_handle(void) { // is delivered that either terminates the process or causes the invocation // of a signal-catching function. if(pause() == -1 && errno == EINTR) { + errno_clear(); // loop once, but keep looping while signals are coming in // this is needed because a few operations may take some time @@ -227,7 +212,7 @@ void signals_handle(void) { break; case NETDATA_SIGNAL_CHILD: - reap_children(); + sigchild_handle(); break; default: diff --git a/src/daemon/static_threads.c b/src/daemon/static_threads.c index b67dab6f69..c6ec799560 100644 --- a/src/daemon/static_threads.c +++ b/src/daemon/static_threads.c @@ -30,11 +30,7 @@ const struct netdata_static_thread static_threads_common[] = { .name = "HEALTH", .config_section = NULL, .config_name = NULL, -#ifdef OS_WINDOWS - .enabled = 0, -#else .enabled = 1, -#endif .thread = NULL, .init_routine = NULL, .start_routine = health_main @@ -74,11 +70,7 @@ const struct netdata_static_thread static_threads_common[] = { .name = "PLUGINSD", .config_section = NULL, .config_name = NULL, -#ifdef OS_WINDOWS - .enabled = 0, -#else .enabled = 1, -#endif .thread = NULL, .init_routine = NULL, .start_routine = pluginsd_main @@ -101,8 +93,6 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = statsd_main }, -#ifndef OS_WINDOWS - // this crashes the debugger under windows { .name = "EXPORTING", .config_section = NULL, @@ -112,7 +102,6 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = exporting_main }, -#endif { .name = "SNDR[localhost]", .config_section = NULL, diff --git a/src/daemon/watcher.c b/src/daemon/watcher.c index 1e0090e241..bf01968f55 100644 --- a/src/daemon/watcher.c +++ b/src/daemon/watcher.c @@ -151,6 +151,8 @@ void watcher_thread_start() { "remove pid file"; watcher_steps[WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES].msg = "free openssl structures"; + watcher_steps[WATCHER_STEP_ID_DESTROY_MAIN_SPAWN_SERVER].msg = + "destroy main spawn server"; watcher_steps[WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE].msg = "remove incomplete shutdown file"; diff --git a/src/daemon/watcher.h b/src/daemon/watcher.h index b785ca4362..4af09480c4 100644 --- a/src/daemon/watcher.h +++ b/src/daemon/watcher.h @@ -30,6 +30,7 @@ typedef enum { WATCHER_STEP_ID_CLOSE_SQL_DATABASES, WATCHER_STEP_ID_REMOVE_PID_FILE, WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES, + WATCHER_STEP_ID_DESTROY_MAIN_SPAWN_SERVER, WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE, // Always keep this as the last enum value diff --git a/src/daemon/win_system-info.c b/src/daemon/win_system-info.c index 7bd6b8f5f5..801e1e4e72 100644 --- a/src/daemon/win_system-info.c +++ b/src/daemon/win_system-info.c @@ -172,7 +172,7 @@ static DWORD netdata_windows_get_current_build() cBuild, 63, HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion", "CurrentBuild")) return 0; - errno = 0; + errno_clear(); DWORD version = strtol(cBuild, NULL, 10); if (errno == ERANGE) diff --git a/src/daemon/winsvc.cc b/src/daemon/winsvc.cc index cdf503256c..9c5eb49ff9 100644 --- a/src/daemon/winsvc.cc +++ b/src/daemon/winsvc.cc @@ -219,7 +219,7 @@ static bool update_path() { int main(int argc, char *argv[]) { - bool tty = isatty(fileno(stdout)) == 1; + bool tty = isatty(fileno(stdin)) == 1; if (!update_path()) { return 1; diff --git a/src/database/engine/rrdengine.c b/src/database/engine/rrdengine.c index 2d6583ead5..a989877fcf 100644 --- a/src/database/engine/rrdengine.c +++ b/src/database/engine/rrdengine.c @@ -1517,7 +1517,7 @@ static void *journal_v2_indexing_tp_worker(struct rrdengine_instance *ctx __mayb break; } - errno = 0; + errno_clear(); if(count) nd_log(NDLS_DAEMON, NDLP_DEBUG, "DBENGINE: journal indexing done; %u files processed", diff --git a/src/database/rrd.h b/src/database/rrd.h index 097e250250..bd31e21e13 100644 --- a/src/database/rrd.h +++ b/src/database/rrd.h @@ -1043,7 +1043,6 @@ struct alarm_entry { STRING *recipient; time_t exec_run_timestamp; int exec_code; - uint64_t exec_spawn_serial; STRING *source; STRING *units; @@ -1069,6 +1068,8 @@ struct alarm_entry { time_t last_repeat; + POPEN_INSTANCE *popen_instance; + struct alarm_entry *next; struct alarm_entry *next_in_progress; struct alarm_entry *prev_in_progress; diff --git a/src/database/rrdhost.c b/src/database/rrdhost.c index dd5f2a43e2..b3d786cff3 100644 --- a/src/database/rrdhost.c +++ b/src/database/rrdhost.c @@ -1494,18 +1494,16 @@ static void rrdhost_load_kubernetes_labels(void) { return; } - pid_t pid; - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen(label_script, &pid, &fp_child_input); - if(!fp_child_output) return; + POPEN_INSTANCE *instance = spawn_popen_run(label_script); + if(!instance) return; char buffer[1000 + 1]; - while (fgets(buffer, 1000, fp_child_output) != NULL) + while (fgets(buffer, 1000, instance->child_stdout_fp) != NULL) rrdlabels_add_pair(localhost->rrdlabels, buffer, RRDLABEL_SRC_AUTO|RRDLABEL_SRC_K8S); // Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':' // Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null - int rc = netdata_pclose(fp_child_input, fp_child_output, pid); + int rc = spawn_popen_wait(instance); if(rc) nd_log(NDLS_DAEMON, NDLP_ERR, "%s exited abnormally. Failed to get kubernetes labels.", diff --git a/src/database/sqlite/sqlite_context.c b/src/database/sqlite/sqlite_context.c index 1e49dd2bf8..1d0c768e5b 100644 --- a/src/database/sqlite/sqlite_context.c +++ b/src/database/sqlite/sqlite_context.c @@ -43,7 +43,7 @@ int sql_init_context_database(int memory) return 1; } - errno = 0; + errno_clear(); netdata_log_info("SQLite database %s initialization", sqlite_database); char buf[1024 + 1] = ""; diff --git a/src/database/sqlite/sqlite_db_migration.c b/src/database/sqlite/sqlite_db_migration.c index 88abd84924..44a5e97c29 100644 --- a/src/database/sqlite/sqlite_db_migration.c +++ b/src/database/sqlite/sqlite_db_migration.c @@ -518,7 +518,7 @@ static int migrate_database(sqlite3 *database, int target_version, char *db_name } if (likely(user_version == target_version)) { - errno = 0; + errno_clear(); netdata_log_info("%s database version is %d (no migration needed)", db_name, target_version); return target_version; } diff --git a/src/database/sqlite/sqlite_health.c b/src/database/sqlite/sqlite_health.c index 51e38d05aa..ddb847080c 100644 --- a/src/database/sqlite/sqlite_health.c +++ b/src/database/sqlite/sqlite_health.c @@ -461,7 +461,7 @@ void sql_alert_cleanup(bool cli) { UNUSED(cli); - errno = 0; + errno_clear(); if (sql_init_meta_database(DB_CHECK_NONE, 0)) { netdata_log_error("Failed to open database"); return; diff --git a/src/database/sqlite/sqlite_metadata.c b/src/database/sqlite/sqlite_metadata.c index e0302ff37e..11b84a6043 100644 --- a/src/database/sqlite/sqlite_metadata.c +++ b/src/database/sqlite/sqlite_metadata.c @@ -711,7 +711,7 @@ int sql_init_meta_database(db_check_action_type_t rebuild, int memory) } if (rebuild & DB_CHECK_ANALYZE) { - errno = 0; + errno_clear(); netdata_log_info("Running ANALYZE on %s", sqlite_database); rc = sqlite3_exec_monitored(db_meta, "ANALYZE", 0, 0, &err_msg); if (rc != SQLITE_OK) { @@ -725,7 +725,7 @@ int sql_init_meta_database(db_check_action_type_t rebuild, int memory) return 1; } - errno = 0; + errno_clear(); netdata_log_info("SQLite database %s initialization", sqlite_database); rc = sqlite3_create_function(db_meta, "u2h", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, 0, sqlite_uuid_parse, 0, 0); diff --git a/src/exporting/send_data.c b/src/exporting/send_data.c index b79f0a3e30..097b7fd4b3 100644 --- a/src/exporting/send_data.c +++ b/src/exporting/send_data.c @@ -77,7 +77,7 @@ void simple_connector_receive_response(int *sock, struct instance *instance) ERR_clear_error(); #endif - errno = 0; + errno_clear(); // loop through to collect all data while (*sock != -1 && errno != EWOULDBLOCK) { diff --git a/src/health/health_log.c b/src/health/health_log.c index b04f8f248a..209e8d3292 100644 --- a/src/health/health_log.c +++ b/src/health/health_log.c @@ -43,7 +43,7 @@ void health_log_alert_transition_with_trace(RRDHOST *host, ALARM_ENTRY *ae, int }; ND_LOG_STACK_PUSH(lgs); - errno = 0; + errno_clear(); ND_LOG_FIELD_PRIORITY priority = NDLP_INFO; diff --git a/src/health/health_notifications.c b/src/health/health_notifications.c index 79426f48c1..85dd2d0d8b 100644 --- a/src/health/health_notifications.c +++ b/src/health/health_notifications.c @@ -23,7 +23,13 @@ void health_alarm_wait_for_execution(ALARM_ENTRY *ae) { if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS)) return; - spawn_wait_cmd(ae->exec_spawn_serial, &ae->exec_code, &ae->exec_run_timestamp); + if(!ae->popen_instance) { + // nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not spawn a notification"); + return; + } + + ae->exec_code = spawn_popen_wait(ae->popen_instance); + netdata_log_debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code); ae->flags &= ~HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; @@ -75,7 +81,6 @@ static inline void enqueue_alarm_notify_in_progress(ALARM_ENTRY *ae) alarm_notifications_in_progress.head = ae; } alarm_notifications_in_progress.tail = ae; - } static bool prepare_command(BUFFER *wb, @@ -462,7 +467,7 @@ void health_send_notification(RRDHOST *host, ALARM_ENTRY *ae, struct health_rais netdata_log_debug(D_HEALTH, "executing command '%s'", command_to_run); ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; - ae->exec_spawn_serial = spawn_enq_cmd(command_to_run); + ae->popen_instance = spawn_popen_run(command_to_run); enqueue_alarm_notify_in_progress(ae); health_alarm_log_save(host, ae); } else { diff --git a/src/libnetdata/clocks/clocks.c b/src/libnetdata/clocks/clocks.c index a681689623..5da450a2dc 100644 --- a/src/libnetdata/clocks/clocks.c +++ b/src/libnetdata/clocks/clocks.c @@ -343,7 +343,7 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) { } if(unlikely(now < next)) { - errno = 0; + errno_clear(); nd_log_limit_static_global_var(erl, 10, 0); nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, "heartbeat clock: woke up %"PRIu64" microseconds earlier than expected " @@ -351,7 +351,7 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) { next - now); } else if(unlikely(now - next > tick / 2)) { - errno = 0; + errno_clear(); nd_log_limit_static_global_var(erl, 10, 0); nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, "heartbeat clock: woke up %"PRIu64" microseconds later than expected " diff --git a/src/libnetdata/libnetdata.c b/src/libnetdata/libnetdata.c index 909bb71d0e..c0339aa0a4 100644 --- a/src/libnetdata/libnetdata.c +++ b/src/libnetdata/libnetdata.c @@ -1248,7 +1248,7 @@ cleanup: close(fd); } if(mem == MAP_FAILED) return NULL; - errno = 0; + errno_clear(); return mem; } @@ -1364,7 +1364,7 @@ int verify_netdata_host_prefix(bool log_msg) { char buffer[FILENAME_MAX + 1]; char *path = netdata_configured_host_prefix; char *reason = "unknown reason"; - errno = 0; + errno_clear(); struct stat sb; if (stat(path, &sb) == -1) { @@ -1679,19 +1679,17 @@ char *find_and_replace(const char *src, const char *find, const char *replace, c return value; } - BUFFER *run_command_and_get_output_to_buffer(const char *command, int max_line_length) { BUFFER *wb = buffer_create(0, NULL); - pid_t pid; - FILE *fp = netdata_popen(command, &pid, NULL); - - if(fp) { + POPEN_INSTANCE *pi = spawn_popen_run(command); + if(pi) { char buffer[max_line_length + 1]; - while (fgets(buffer, max_line_length, fp)) { + while (fgets(buffer, max_line_length, pi->child_stdout_fp)) { buffer[max_line_length] = '\0'; buffer_strcat(wb, buffer); } + spawn_popen_kill(pi); } else { buffer_free(wb); @@ -1699,103 +1697,27 @@ BUFFER *run_command_and_get_output_to_buffer(const char *command, int max_line_l return NULL; } - netdata_pclose(NULL, fp, pid); return wb; } bool run_command_and_copy_output_to_stdout(const char *command, int max_line_length) { - pid_t pid; - FILE *fp = netdata_popen(command, &pid, NULL); - - if(fp) { + POPEN_INSTANCE *pi = spawn_popen_run(command); + if(pi) { char buffer[max_line_length + 1]; - while (fgets(buffer, max_line_length, fp)) + + while (fgets(buffer, max_line_length, pi->child_stdout_fp)) fprintf(stdout, "%s", buffer); + + spawn_popen_kill(pi); } else { netdata_log_error("Failed to execute command '%s'.", command); return false; } - netdata_pclose(NULL, fp, pid); return true; } - -static int fd_is_valid(int fd) { - return fcntl(fd, F_GETFD) != -1 || errno != EBADF; -} - -void for_each_open_fd(OPEN_FD_ACTION action, OPEN_FD_EXCLUDE excluded_fds){ - int fd; - - switch(action){ - case OPEN_FD_ACTION_CLOSE: - if(!(excluded_fds & OPEN_FD_EXCLUDE_STDIN)) (void)close(STDIN_FILENO); - if(!(excluded_fds & OPEN_FD_EXCLUDE_STDOUT)) (void)close(STDOUT_FILENO); - if(!(excluded_fds & OPEN_FD_EXCLUDE_STDERR)) (void)close(STDERR_FILENO); -#if defined(HAVE_CLOSE_RANGE) - if(close_range(STDERR_FILENO + 1, ~0U, 0) == 0) return; - nd_log(NDLS_DAEMON, NDLP_DEBUG, "close_range() failed, will try to close fds one by one"); -#endif - break; - case OPEN_FD_ACTION_FD_CLOEXEC: - if(!(excluded_fds & OPEN_FD_EXCLUDE_STDIN)) (void)fcntl(STDIN_FILENO, F_SETFD, FD_CLOEXEC); - if(!(excluded_fds & OPEN_FD_EXCLUDE_STDOUT)) (void)fcntl(STDOUT_FILENO, F_SETFD, FD_CLOEXEC); - if(!(excluded_fds & OPEN_FD_EXCLUDE_STDERR)) (void)fcntl(STDERR_FILENO, F_SETFD, FD_CLOEXEC); -#if defined(HAVE_CLOSE_RANGE) && defined(CLOSE_RANGE_CLOEXEC) // Linux >= 5.11, FreeBSD >= 13.1 - if(close_range(STDERR_FILENO + 1, ~0U, CLOSE_RANGE_CLOEXEC) == 0) return; - nd_log(NDLS_DAEMON, NDLP_DEBUG, "close_range() failed, will try to mark fds for closing one by one"); -#endif - break; - default: - break; // do nothing - } - - DIR *dir = opendir("/proc/self/fd"); - if (dir == NULL) { - struct rlimit rl; - int open_max = -1; - - if(getrlimit(RLIMIT_NOFILE, &rl) == 0 && rl.rlim_max != RLIM_INFINITY) open_max = rl.rlim_max; -#ifdef _SC_OPEN_MAX - else open_max = sysconf(_SC_OPEN_MAX); -#endif - - if (open_max == -1) open_max = 65535; // 65535 arbitrary default if everything else fails - - for (fd = STDERR_FILENO + 1; fd < open_max; fd++) { - switch(action){ - case OPEN_FD_ACTION_CLOSE: - if(fd_is_valid(fd)) (void)close(fd); - break; - case OPEN_FD_ACTION_FD_CLOEXEC: - (void)fcntl(fd, F_SETFD, FD_CLOEXEC); - break; - default: - break; // do nothing - } - } - } else { - struct dirent *entry; - while ((entry = readdir(dir)) != NULL) { - fd = str2i(entry->d_name); - if(unlikely((fd == STDIN_FILENO ) || (fd == STDOUT_FILENO) || (fd == STDERR_FILENO) )) continue; - switch(action){ - case OPEN_FD_ACTION_CLOSE: - if(fd_is_valid(fd)) (void)close(fd); - break; - case OPEN_FD_ACTION_FD_CLOEXEC: - (void)fcntl(fd, F_SETFD, FD_CLOEXEC); - break; - default: - break; // do nothing - } - } - closedir(dir); - } -} - struct timing_steps { const char *name; usec_t time; diff --git a/src/libnetdata/libnetdata.h b/src/libnetdata/libnetdata.h index 1c72e54106..b4bddb70a1 100644 --- a/src/libnetdata/libnetdata.h +++ b/src/libnetdata/libnetdata.h @@ -326,6 +326,9 @@ size_t judy_aral_structures(void); #define GUID_LEN 36 +#define PIPE_READ 0 +#define PIPE_WRITE 1 + #include "linked-lists.h" #include "storage-point.h" @@ -425,7 +428,7 @@ char *find_and_replace(const char *src, const char *find, const char *replace, c #define UNUSED_FUNCTION(x) UNUSED_##x #endif -#define error_report(x, args...) do { errno = 0; netdata_log_error(x, ##args); } while(0) +#define error_report(x, args...) do { errno_clear(); netdata_log_error(x, ##args); } while(0) // Taken from linux kernel #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -440,17 +443,6 @@ char *find_and_replace(const char *src, const char *find, const char *replace, c bool run_command_and_copy_output_to_stdout(const char *command, int max_line_length); struct web_buffer *run_command_and_get_output_to_buffer(const char *command, int max_line_length); -typedef enum { - OPEN_FD_ACTION_CLOSE, - OPEN_FD_ACTION_FD_CLOEXEC -} OPEN_FD_ACTION; -typedef enum { - OPEN_FD_EXCLUDE_STDIN = 0x01, - OPEN_FD_EXCLUDE_STDOUT = 0x02, - OPEN_FD_EXCLUDE_STDERR = 0x04 -} OPEN_FD_EXCLUDE; -void for_each_open_fd(OPEN_FD_ACTION action, OPEN_FD_EXCLUDE excluded_fds); - #ifdef OS_WINDOWS void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data); #else @@ -483,7 +475,9 @@ extern char *netdata_configured_host_prefix; #include "datetime/rfc3339.h" #include "datetime/rfc7231.h" #include "completion/completion.h" -#include "popen/popen.h" +#include "log/log.h" +#include "spawn_server/spawn_server.h" +#include "spawn_server/spawn_popen.h" #include "simple_pattern/simple_pattern.h" #ifdef ENABLE_HTTPS # include "socket/security.h" @@ -491,7 +485,6 @@ extern char *netdata_configured_host_prefix; #include "socket/socket.h" #include "config/appconfig.h" #include "log/journal.h" -#include "log/log.h" #include "buffered_reader/buffered_reader.h" #include "procfile/procfile.h" #include "string/string.h" diff --git a/src/libnetdata/log/log.c b/src/libnetdata/log/log.c index 501b663245..135d20f6f3 100644 --- a/src/libnetdata/log/log.c +++ b/src/libnetdata/log/log.c @@ -6,6 +6,10 @@ #include "../libnetdata.h" +#if defined(OS_WINDOWS) +#include <windows.h> +#endif + #ifdef __FreeBSD__ #include <sys/endian.h> #endif @@ -35,6 +39,16 @@ int aclklog_enabled = 0; struct nd_log_source; static bool nd_log_limit_reached(struct nd_log_source *source); +// ---------------------------------------------------------------------------- + +void errno_clear(void) { + errno = 0; + +#if defined(OS_WINDOWS) + SetLastError(ERROR_SUCCESS); +#endif +} + // ---------------------------------------------------------------------------- // logging method @@ -514,6 +528,13 @@ int nd_log_health_fd(void) { return STDERR_FILENO; } +int nd_log_collectors_fd(void) { + if(nd_log.sources[NDLS_COLLECTORS].method == NDLM_FILE && nd_log.sources[NDLS_COLLECTORS].fd != -1) + return nd_log.sources[NDLS_COLLECTORS].fd; + + return STDERR_FILENO; +} + void nd_log_set_user_settings(ND_LOG_SOURCES source, const char *setting) { char buf[FILENAME_MAX + 100]; if(setting && *setting) @@ -1011,6 +1032,10 @@ static void errno_annotator(BUFFER *wb, const char *key, struct log_field *lf); static void priority_annotator(BUFFER *wb, const char *key, struct log_field *lf); static void timestamp_usec_annotator(BUFFER *wb, const char *key, struct log_field *lf); +#if defined(OS_WINDOWS) +static void winerror_annotator(BUFFER *wb, const char *key, struct log_field *lf); +#endif + // ---------------------------------------------------------------------------- typedef void (*annotator_t)(BUFFER *wb, const char *key, struct log_field *lf); @@ -1058,6 +1083,13 @@ static __thread struct log_field thread_log_fields[_NDF_MAX] = { .logfmt = "errno", .logfmt_annotator = errno_annotator, }, +#if defined(OS_WINDOWS) + [NDF_WINERROR] = { + .journal = "WINERROR", + .logfmt = "winerror", + .logfmt_annotator = winerror_annotator, + }, +#endif [NDF_INVOCATION_ID] = { .journal = "INVOCATION_ID", // standard journald field .logfmt = NULL, @@ -1563,6 +1595,45 @@ static void errno_annotator(BUFFER *wb, const char *key, struct log_field *lf) { buffer_fast_strcat(wb, "\"", 1); } +#if defined(OS_WINDOWS) +static void winerror_annotator(BUFFER *wb, const char *key, struct log_field *lf) { + DWORD errnum = log_field_to_uint64(lf); + + if(errnum == 0) + return; + + char buf[1024]; + DWORD size = FormatMessageA( + FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + errnum, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + buf, + (DWORD)(sizeof(buf) - 1), + NULL + ); + if(size > 0) { + // remove \r\n at the end + while(size > 0 && (buf[size - 1] == '\r' || buf[size - 1] == '\n')) + buf[--size] = '\0'; + } + else + size = snprintf(buf, sizeof(buf) - 1, "unknown error code"); + + buf[size] = '\0'; + + if(buffer_strlen(wb)) + buffer_fast_strcat(wb, " ", 1); + + buffer_strcat(wb, key); + buffer_fast_strcat(wb, "=\"", 2); + buffer_print_int64(wb, errnum); + buffer_fast_strcat(wb, ", ", 2); + buffer_json_strcat(wb, buf); + buffer_fast_strcat(wb, "\"", 1); +} +#endif + static void priority_annotator(BUFFER *wb, const char *key, struct log_field *lf) { uint64_t pri = log_field_to_uint64(lf); @@ -2099,8 +2170,8 @@ static void nd_logger_merge_log_stack_to_thread_fields(void) { } static void nd_logger(const char *file, const char *function, const unsigned long line, - ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, bool limit, int saved_errno, - const char *fmt, va_list ap) { + ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, bool limit, + int saved_errno, size_t saved_winerror __maybe_unused, const char *fmt, va_list ap) { SPINLOCK *spinlock; FILE *fp; @@ -2168,6 +2239,11 @@ static void nd_logger(const char *file, const char *function, const unsigned lon if(saved_errno != 0 && !thread_log_fields[NDF_ERRNO].entry.set) thread_log_fields[NDF_ERRNO].entry = ND_LOG_FIELD_I64(NDF_ERRNO, saved_errno); +#if defined(OS_WINDOWS) + if(saved_winerror != 0 && !thread_log_fields[NDF_WINERROR].entry.set) + thread_log_fields[NDF_WINERROR].entry = ND_LOG_FIELD_U64(NDF_WINERROR, saved_winerror); +#endif + CLEAN_BUFFER *wb = NULL; if(fmt && !thread_log_fields[NDF_MESSAGE].entry.set) { wb = buffer_create(1024, NULL); @@ -2215,7 +2291,7 @@ static void nd_logger(const char *file, const char *function, const unsigned lon nd_log.sources[source].pending_msg = NULL; } - errno = 0; + errno_clear(); } static ND_LOG_SOURCES nd_log_validate_source(ND_LOG_SOURCES source) { @@ -2234,6 +2310,12 @@ static ND_LOG_SOURCES nd_log_validate_source(ND_LOG_SOURCES source) { void netdata_logger(ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file, const char *function, unsigned long line, const char *fmt, ... ) { int saved_errno = errno; + + size_t saved_winerror = 0; +#if defined(OS_WINDOWS) + saved_winerror = GetLastError(); +#endif + source = nd_log_validate_source(source); if (source != NDLS_DEBUG && priority > nd_log.sources[source].min_priority) @@ -2243,12 +2325,18 @@ void netdata_logger(ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const va_start(args, fmt); nd_logger(file, function, line, source, priority, source == NDLS_DAEMON || source == NDLS_COLLECTORS, - saved_errno, fmt, args); + saved_errno, saved_winerror, fmt, args); va_end(args); } void netdata_logger_with_limit(ERROR_LIMIT *erl, ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { int saved_errno = errno; + + size_t saved_winerror = 0; +#if defined(OS_WINDOWS) + saved_winerror = GetLastError(); +#endif + source = nd_log_validate_source(source); if (source != NDLS_DEBUG && priority > nd_log.sources[source].min_priority) @@ -2272,7 +2360,7 @@ void netdata_logger_with_limit(ERROR_LIMIT *erl, ND_LOG_SOURCES source, ND_LOG_F va_start(args, fmt); nd_logger(file, function, line, source, priority, source == NDLS_DAEMON || source == NDLS_COLLECTORS, - saved_errno, fmt, args); + saved_errno, saved_winerror, fmt, args); va_end(args); erl->last_logged = now; erl->count = 0; @@ -2280,12 +2368,18 @@ void netdata_logger_with_limit(ERROR_LIMIT *erl, ND_LOG_SOURCES source, ND_LOG_F void netdata_logger_fatal( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) { int saved_errno = errno; + + size_t saved_winerror = 0; +#if defined(OS_WINDOWS) + saved_winerror = GetLastError(); +#endif + ND_LOG_SOURCES source = NDLS_DAEMON; source = nd_log_validate_source(source); va_list args; va_start(args, fmt); - nd_logger(file, function, line, source, NDLP_ALERT, true, saved_errno, fmt, args); + nd_logger(file, function, line, source, NDLP_ALERT, true, saved_errno, saved_winerror, fmt, args); va_end(args); char date[LOG_DATE_LENGTH]; diff --git a/src/libnetdata/log/log.h b/src/libnetdata/log/log.h index 338a5d53b7..7517d9d667 100644 --- a/src/libnetdata/log/log.h +++ b/src/libnetdata/log/log.h @@ -46,6 +46,9 @@ typedef enum __attribute__((__packed__)) { NDF_LOG_SOURCE, // DAEMON, COLLECTORS, HEALTH, ACCESS, ACLK - set at the log call NDF_PRIORITY, // the syslog priority (severity) - set at the log call NDF_ERRNO, // the ERRNO at the time of the log call - added automatically +#if defined(OS_WINDOWS) + NDF_WINERROR, // Windows GetLastError() +#endif NDF_INVOCATION_ID, // the INVOCATION_ID of Netdata - added automatically NDF_LINE, // the source code file line number - added automatically NDF_FILE, // the source code filename - added automatically @@ -141,6 +144,7 @@ typedef enum __attribute__((__packed__)) { NDFT_CALLBACK, } ND_LOG_STACK_FIELD_TYPE; +void errno_clear(void); void nd_log_set_user_settings(ND_LOG_SOURCES source, const char *setting); void nd_log_set_facility(const char *facility); void nd_log_set_priority_level(const char *setting); @@ -157,6 +161,7 @@ const char *nd_log_id2priority(ND_LOG_FIELD_PRIORITY priority); const char *nd_log_method_for_external_plugins(const char *s); int nd_log_health_fd(void); +int nd_log_collectors_fd(void); typedef bool (*log_formatter_callback_t)(BUFFER *wb, void *data); struct log_stack_entry { diff --git a/src/libnetdata/maps/local-sockets.h b/src/libnetdata/maps/local-sockets.h index d407e6be6b..c1d1199439 100644 --- a/src/libnetdata/maps/local-sockets.h +++ b/src/libnetdata/maps/local-sockets.h @@ -5,10 +5,8 @@ #include "libnetdata/libnetdata.h" -// disable libmnl for the moment -#undef HAVE_LIBMNL - #ifdef HAVE_LIBMNL +#include <linux/rtnetlink.h> #include <linux/inet_diag.h> #include <linux/sock_diag.h> #include <linux/unix_diag.h> @@ -67,30 +65,41 @@ struct local_port; struct local_socket_state; typedef void (*local_sockets_cb_t)(struct local_socket_state *state, struct local_socket *n, void *data); +struct local_sockets_config { + bool listening; + bool inbound; + bool outbound; + bool local; + bool tcp4; + bool tcp6; + bool udp4; + bool udp6; + bool pid; + bool cmdline; + bool comm; + bool uid; + bool namespaces; + bool tcp_info; + + size_t max_errors; + size_t max_concurrent_namespaces; + + local_sockets_cb_t cb; + void *data; + + const char *host_prefix; + + // internal use + uint64_t net_ns_inode; +}; + typedef struct local_socket_state { - struct { - bool listening; - bool inbound; - bool outbound; - bool local; - bool tcp4; - bool tcp6; - bool udp4; - bool udp6; - bool pid; - bool cmdline; - bool comm; - bool uid; - bool namespaces; - size_t max_errors; - - local_sockets_cb_t cb; - void *data; - - const char *host_prefix; - } config; + struct local_sockets_config config; struct { + size_t mnl_sends; + size_t namespaces_found; + size_t tcp_info_received; size_t pid_fds_processed; size_t pid_fds_opendir_failed; size_t pid_fds_readlink_failed; @@ -98,6 +107,9 @@ typedef struct local_socket_state { size_t errors_encountered; } stats; + bool spawn_server_is_mine; + SPAWN_SERVER *spawn_server; + #ifdef HAVE_LIBMNL bool use_nl; struct mnl_socket *nl; @@ -106,6 +118,7 @@ typedef struct local_socket_state { ARAL *local_socket_aral; ARAL *pid_socket_aral; + SPINLOCK spinlock; // for namespaces uint64_t proc_self_net_ns_inode; @@ -181,12 +194,21 @@ typedef struct local_socket { SOCKET_DIRECTION direction; uint8_t timer; - uint8_t retransmits; + uint8_t retransmits; // the # of packets currently queued for retransmission (not yet acknowledged) uint32_t expires; uint32_t rqueue; uint32_t wqueue; uid_t uid; + struct { + bool checked; + bool ipv46; + } ipv6ony; + + union { + struct tcp_info tcp; + } info; + char comm[TASK_COMM_LEN]; STRING *cmdline; @@ -201,16 +223,18 @@ typedef struct local_socket { #endif } LOCAL_SOCKET; +static inline void local_sockets_spawn_server_callback(SPAWN_REQUEST *request); + // -------------------------------------------------------------------------------------------------------------------- static inline void local_sockets_log(LS_STATE *ls, const char *format, ...) PRINTFLIKE(2, 3); static inline void local_sockets_log(LS_STATE *ls, const char *format, ...) { - if(++ls->stats.errors_encountered == ls->config.max_errors) { + if(ls && ++ls->stats.errors_encountered == ls->config.max_errors) { nd_log(NDLS_COLLECTORS, NDLP_ERR, "LOCAL-SOCKETS: max number of logs reached. Not logging anymore"); return; } - if(ls->stats.errors_encountered > ls->config.max_errors) + if(ls && ls->stats.errors_encountered > ls->config.max_errors) return; char buf[16384]; @@ -224,6 +248,133 @@ static inline void local_sockets_log(LS_STATE *ls, const char *format, ...) { // -------------------------------------------------------------------------------------------------------------------- +static bool local_sockets_is_ipv4_mapped_ipv6_address(const struct in6_addr *addr) { + // An IPv4-mapped IPv6 address starts with 80 bits of zeros followed by 16 bits of ones + static const unsigned char ipv4_mapped_prefix[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF }; + return memcmp(addr->s6_addr, ipv4_mapped_prefix, 12) == 0; +} + +static bool local_sockets_is_loopback_address(struct socket_endpoint *se) { + if (se->family == AF_INET) { + // For IPv4, loopback addresses are in the 127.0.0.0/8 range + return (ntohl(se->ip.ipv4) >> 24) == 127; // Check if the first byte is 127 + } else if (se->family == AF_INET6) { + // Check if the address is an IPv4-mapped IPv6 address + if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) { + // Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range + uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; + const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12)); + return (ntohl(ipv4_addr) >> 24) == 127; + } + + // For IPv6, loopback address is ::1 + return memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0; + } + + return false; +} + +static inline bool local_sockets_is_ipv4_reserved_address(uint32_t ip) { + // Check for the reserved address ranges + ip = ntohl(ip); + return ( + (ip >> 24 == 10) || // Private IP range (A class) + (ip >> 20 == (172 << 4) + 1) || // Private IP range (B class) + (ip >> 16 == (192 << 8) + 168) || // Private IP range (C class) + (ip >> 24 == 127) || // Loopback address (127.0.0.0) + (ip >> 24 == 0) || // Reserved (0.0.0.0) + (ip >> 24 == 169 && (ip >> 16) == 254) || // Link-local address (169.254.0.0) + (ip >> 16 == (192 << 8) + 0) // Test-Net (192.0.0.0) + ); +} + +static inline bool local_sockets_is_private_address(struct socket_endpoint *se) { + if (se->family == AF_INET) { + return local_sockets_is_ipv4_reserved_address(se->ip.ipv4); + } + else if (se->family == AF_INET6) { + uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; + + // Check if the address is an IPv4-mapped IPv6 address + if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) { + // Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range + const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12)); + return local_sockets_is_ipv4_reserved_address(ipv4_addr); + } + + // Check for link-local addresses (fe80::/10) + if ((ip6[0] == 0xFE) && ((ip6[1] & 0xC0) == 0x80)) + return true; + + // Check for Unique Local Addresses (ULA) (fc00::/7) + if ((ip6[0] & 0xFE) == 0xFC) + return true; + + // Check for multicast addresses (ff00::/8) + if (ip6[0] == 0xFF) + return true; + + // For IPv6, loopback address is :: or ::1 + return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0 || + memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0; + } + + return false; +} + +static bool local_sockets_is_multicast_address(struct socket_endpoint *se) { + if (se->family == AF_INET) { + // For IPv4, check if the address is 0.0.0.0 + uint32_t ip = htonl(se->ip.ipv4); + return (ip >= 0xE0000000 && ip <= 0xEFFFFFFF); // Multicast address range (224.0.0.0/4) + } + else if (se->family == AF_INET6) { + // For IPv6, check if the address is ff00::/8 + uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; + return ip6[0] == 0xff; + } + + return false; +} + +static bool local_sockets_is_zero_address(struct socket_endpoint *se) { + if (se->family == AF_INET) { + // For IPv4, check if the address is 0.0.0.0 + return se->ip.ipv4 == 0; + } + else if (se->family == AF_INET6) { + // For IPv6, check if the address is :: + return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0; + } + + return false; +} + +static inline const char *local_sockets_address_space(struct socket_endpoint *se) { + if(local_sockets_is_zero_address(se)) + return "zero"; + else if(local_sockets_is_loopback_address(se)) + return "loopback"; + else if(local_sockets_is_multicast_address(se)) + return "multicast"; + else if(local_sockets_is_private_address(se)) + return "private"; + else + return "public"; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline bool is_local_socket_ipv46(LOCAL_SOCKET *n) { + return n->local.family == AF_INET6 && + n->direction == SOCKET_DIRECTION_LISTEN && + local_sockets_is_zero_address(&n->local) && + n->ipv6ony.checked && + n->ipv6ony.ipv46; +} + +// -------------------------------------------------------------------------------------------------------------------- + static void local_sockets_foreach_local_socket_call_cb(LS_STATE *ls) { for(SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_first_read_only_LOCAL_SOCKET(&ls->sockets_hashtable); sl; @@ -425,123 +576,6 @@ static inline bool local_sockets_find_all_sockets_in_proc(LS_STATE *ls, const ch // -------------------------------------------------------------------------------------------------------------------- -static bool local_sockets_is_ipv4_mapped_ipv6_address(const struct in6_addr *addr) { - // An IPv4-mapped IPv6 address starts with 80 bits of zeros followed by 16 bits of ones - static const unsigned char ipv4_mapped_prefix[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF }; - return memcmp(addr->s6_addr, ipv4_mapped_prefix, 12) == 0; -} - -static bool local_sockets_is_loopback_address(struct socket_endpoint *se) { - if (se->family == AF_INET) { - // For IPv4, loopback addresses are in the 127.0.0.0/8 range - return (ntohl(se->ip.ipv4) >> 24) == 127; // Check if the first byte is 127 - } else if (se->family == AF_INET6) { - // Check if the address is an IPv4-mapped IPv6 address - if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) { - // Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range - uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; - const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12)); - return (ntohl(ipv4_addr) >> 24) == 127; - } - - // For IPv6, loopback address is ::1 - return memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0; - } - - return false; -} - -static inline bool local_sockets_is_ipv4_reserved_address(uint32_t ip) { - // Check for the reserved address ranges - ip = ntohl(ip); - return ( - (ip >> 24 == 10) || // Private IP range (A class) - (ip >> 20 == (172 << 4) + 1) || // Private IP range (B class) - (ip >> 16 == (192 << 8) + 168) || // Private IP range (C class) - (ip >> 24 == 127) || // Loopback address (127.0.0.0) - (ip >> 24 == 0) || // Reserved (0.0.0.0) - (ip >> 24 == 169 && (ip >> 16) == 254) || // Link-local address (169.254.0.0) - (ip >> 16 == (192 << 8) + 0) // Test-Net (192.0.0.0) - ); -} - -static inline bool local_sockets_is_private_address(struct socket_endpoint *se) { - if (se->family == AF_INET) { - return local_sockets_is_ipv4_reserved_address(se->ip.ipv4); - } - else if (se->family == AF_INET6) { - uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; - - // Check if the address is an IPv4-mapped IPv6 address - if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) { - // Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range - const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12)); - return local_sockets_is_ipv4_reserved_address(ipv4_addr); - } - - // Check for link-local addresses (fe80::/10) - if ((ip6[0] == 0xFE) && ((ip6[1] & 0xC0) == 0x80)) - return true; - - // Check for Unique Local Addresses (ULA) (fc00::/7) - if ((ip6[0] & 0xFE) == 0xFC) - return true; - - // Check for multicast addresses (ff00::/8) - if (ip6[0] == 0xFF) - return true; - - // For IPv6, loopback address is :: or ::1 - return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0 || - memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0; - } - - return false; -} - -static bool local_sockets_is_multicast_address(struct socket_endpoint *se) { - if (se->family == AF_INET) { - // For IPv4, check if the address is 0.0.0.0 - uint32_t ip = htonl(se->ip.ipv4); - return (ip >= 0xE0000000 && ip <= 0xEFFFFFFF); // Multicast address range (224.0.0.0/4) - } - else if (se->family == AF_INET6) { - // For IPv6, check if the address is ff00::/8 - uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; - return ip6[0] == 0xff; - } - - return false; -} - -static bool local_sockets_is_zero_address(struct socket_endpoint *se) { - if (se->family == AF_INET) { - // For IPv4, check if the address is 0.0.0.0 - return se->ip.ipv4 == 0; - } - else if (se->family == AF_INET6) { - // For IPv6, check if the address is :: - return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0; - } - - return false; -} - -static inline const char *local_sockets_address_space(struct socket_endpoint *se) { - if(local_sockets_is_zero_address(se)) - return "zero"; - else if(local_sockets_is_loopback_address(se)) - return "loopback"; - else if(local_sockets_is_multicast_address(se)) - return "multicast"; - else if(local_sockets_is_private_address(se)) - return "private"; - else - return "public"; -} - -// -------------------------------------------------------------------------------------------------------------------- - static inline void local_sockets_index_listening_port(LS_STATE *ls, LOCAL_SOCKET *n) { if(n->direction & SOCKET_DIRECTION_LISTEN) { // for the listening sockets, keep a hashtable with all the local ports @@ -636,28 +670,31 @@ static inline bool local_sockets_add_socket(LS_STATE *ls, LOCAL_SOCKET *tmp) { #ifdef HAVE_LIBMNL -static inline void local_sockets_netlink_init(LS_STATE *ls) { - ls->use_nl = true; +static inline void local_sockets_libmnl_init(LS_STATE *ls) { ls->nl = mnl_socket_open(NETLINK_INET_DIAG); - if (!ls->nl) { - local_sockets_log(ls, "cannot open netlink socket"); + if (ls->nl == NULL) { + local_sockets_log(ls, "cannot open libmnl netlink socket"); ls->use_nl = false; } - - if (mnl_socket_bind(ls->nl, 0, MNL_SOCKET_AUTOPID) < 0) { - local_sockets_log(ls, "cannot bind netlink socket"); + else if (mnl_socket_bind(ls->nl, 0, MNL_SOCKET_AUTOPID) < 0) { + local_sockets_log(ls, "cannot bind libmnl netlink socket"); + mnl_socket_close(ls->nl); + ls->nl = NULL; ls->use_nl = false; } + else + ls->use_nl = true; } -static inline void local_sockets_netlink_cleanup(LS_STATE *ls) { +static inline void local_sockets_libmnl_cleanup(LS_STATE *ls) { if(ls->nl) { mnl_socket_close(ls->nl); ls->nl = NULL; + ls->use_nl = false; } } -static inline int local_sockets_netlink_cb_data(const struct nlmsghdr *nlh, void *data) { +static inline int local_sockets_libmnl_cb_data(const struct nlmsghdr *nlh, void *data) { LS_STATE *ls = data; struct inet_diag_msg *diag_msg = mnl_nlmsg_get_payload(nlh); @@ -666,15 +703,19 @@ static inline int local_sockets_netlink_cb_data(const struct nlmsghdr *nlh, void .inode = diag_msg->idiag_inode, .direction = SOCKET_DIRECTION_NONE, .state = diag_msg->idiag_state, + .ipv6ony = { + .checked = false, + .ipv46 = false, + }, .local = { .protocol = ls->tmp_protocol, .family = diag_msg->idiag_family, - .port = diag_msg->id.idiag_sport, + .port = ntohs(diag_msg->id.idiag_sport), }, .remote = { .protocol = ls->tmp_protocol, .family = diag_msg->idiag_family, - .port = diag_msg->id.idiag_dport, + .port = ntohs(diag_msg->id.idiag_dport), }, .timer = diag_msg->idiag_timer, .retransmits = diag_msg->idiag_retrans, @@ -693,12 +734,37 @@ static inline int local_sockets_netlink_cb_data(const struct nlmsghdr *nlh, void memcpy(&n.remote.ip.ipv6, diag_msg->id.idiag_dst, sizeof(n.remote.ip.ipv6)); } + struct rtattr *attr = (struct rtattr *)(diag_msg + 1); + int rtattrlen = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*diag_msg)); + for (; !n.ipv6ony.checked && RTA_OK(attr, rtattrlen); attr = RTA_NEXT(attr, rtattrlen)) { + switch (attr->rta_type) { + case INET_DIAG_INFO: { + if(ls->tmp_protocol == IPPROTO_TCP) { + struct tcp_info *info = (struct tcp_info *)RTA_DATA(attr); + n.info.tcp = *info; + ls->stats.tcp_info_received++; + } + } + break; + + case INET_DIAG_SKV6ONLY: { + n.ipv6ony.checked = true; + int ipv6only = *(int *)RTA_DATA(attr); + n.ipv6ony.ipv46 = !ipv6only; + } + break; + + default: + break; + } + } + local_sockets_add_socket(ls, &n); return MNL_CB_OK; } -static inline bool local_sockets_netlink_get_sockets(LS_STATE *ls, uint16_t family, uint16_t protocol) { +static inline bool local_sockets_libmnl_get_sockets(LS_STATE *ls, uint16_t family, uint16_t protocol) { ls->tmp_protocol = protocol; char buf[MNL_SOCKET_BUFFER_SIZE]; @@ -710,14 +776,22 @@ static inline bool local_sockets_netlink_get_sockets(LS_STATE *ls, uint16_t fami req.sdiag_family = family; req.sdiag_protocol = protocol; req.idiag_states = -1; + req.idiag_ext = 0; + + if(family == AF_INET6) + req.idiag_ext |= 1 << (INET_DIAG_SKV6ONLY - 1); + + if(protocol == IPPROTO_TCP && ls->config.tcp_info) + req.idiag_ext |= 1 << (INET_DIAG_INFO - 1); nlh = mnl_nlmsg_put_header(buf); nlh->nlmsg_type = SOCK_DIAG_BY_FAMILY; - nlh->nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST; + nlh->nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; nlh->nlmsg_seq = seq = time(NULL); mnl_nlmsg_put_extra_header(nlh, sizeof(req)); memcpy(mnl_nlmsg_get_payload(nlh), &req, sizeof(req)); + ls->stats.mnl_sends++; if (mnl_socket_sendto(ls->nl, nlh, nlh->nlmsg_len) < 0) { local_sockets_log(ls, "mnl_socket_send failed"); return false; @@ -725,7 +799,7 @@ static inline bool local_sockets_netlink_get_sockets(LS_STATE *ls, uint16_t fami ssize_t ret; while ((ret = mnl_socket_recvfrom(ls->nl, buf, sizeof(buf))) > 0) { - ret = mnl_cb_run(buf, ret, seq, portid, local_sockets_netlink_cb_data, ls); + ret = mnl_cb_run(buf, ret, seq, portid, local_sockets_libmnl_cb_data, ls); if (ret <= MNL_CB_STOP) break; } @@ -774,6 +848,10 @@ static inline bool local_sockets_read_proc_net_x(LS_STATE *ls, const char *filen LOCAL_SOCKET n = { .direction = SOCKET_DIRECTION_NONE, + .ipv6ony = { + .checked = false, + .ipv46 = false, + }, .local = { .family = family, .protocol = protocol, @@ -904,6 +982,10 @@ static inline void local_sockets_detect_directions(LS_STATE *ls) { // -------------------------------------------------------------------------------------------------------------------- static inline void local_sockets_init(LS_STATE *ls) { + ls->config.host_prefix = netdata_configured_host_prefix; + + spinlock_init(&ls->spinlock); + simple_hashtable_init_NET_NS(&ls->ns_hashtable, 1024); simple_hashtable_init_PID_SOCKET(&ls->pid_sockets_hashtable, 65535); simple_hashtable_init_LOCAL_SOCKET(&ls->sockets_hashtable, 65535); @@ -923,9 +1005,36 @@ static inline void local_sockets_init(LS_STATE *ls) { 65536, 65536, NULL, NULL, NULL, false, true); + + memset(&ls->stats, 0, sizeof(ls->stats)); + +#ifdef HAVE_LIBMNL + ls->use_nl = false; + ls->nl = NULL; + ls->tmp_protocol = 0; + local_sockets_libmnl_init(ls); +#endif + + if(ls->config.namespaces && ls->spawn_server == NULL) { + ls->spawn_server = spawn_server_create(NULL, local_sockets_spawn_server_callback, 0, NULL); + ls->spawn_server_is_mine = true; + } + else + ls->spawn_server_is_mine = false; } static inline void local_sockets_cleanup(LS_STATE *ls) { + + if(ls->spawn_server_is_mine) { + spawn_server_destroy(ls->spawn_server); + ls->spawn_server = NULL; + ls->spawn_server_is_mine = false; + } + +#ifdef HAVE_LIBMNL + local_sockets_libmnl_cleanup(ls); +#endif + // free the sockets hashtable data for(SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_first_read_only_LOCAL_SOCKET(&ls->sockets_hashtable); sl; @@ -963,8 +1072,8 @@ static inline void local_sockets_cleanup(LS_STATE *ls) { static inline void local_sockets_do_family_protocol(LS_STATE *ls, const char *filename, uint16_t family, uint16_t protocol) { #ifdef HAVE_LIBMNL - if(ls->use_nl) { - ls->use_nl = local_sockets_netlink_get_sockets(ls, family, protocol); + if(ls->nl && ls->use_nl) { + ls->use_nl = local_sockets_libmnl_get_sockets(ls, family, protocol); if(ls->use_nl) return; @@ -974,7 +1083,7 @@ static inline void local_sockets_do_family_protocol(LS_STATE *ls, const char *fi local_sockets_read_proc_net_x(ls, filename, family, protocol); } -static inline void local_sockets_read_sockets_from_proc(LS_STATE *ls) { +static inline void local_sockets_read_all_system_sockets(LS_STATE *ls) { char path[FILENAME_MAX + 1]; if(ls->config.namespaces) { @@ -1036,7 +1145,52 @@ static inline void local_sockets_send_to_parent(struct local_socket_state *ls __ local_sockets_log(ls, "failed to write cmdline to pipe"); } -static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_socket *ps, pid_t *pid) { +static inline void local_sockets_spawn_server_callback(SPAWN_REQUEST *request) { + LS_STATE ls = { 0 }; + ls.config = *((struct local_sockets_config *)request->data); + + // we don't need these inside namespaces + ls.config.cmdline = false; + ls.config.comm = false; + ls.config.pid = false; + ls.config.namespaces = false; + + // initialize local sockets + local_sockets_init(&ls); + + ls.config.host_prefix = ""; // we need the /proc of the container + + struct local_sockets_child_work cw = { + .net_ns_inode = ls.proc_self_net_ns_inode, + .fd = request->fds[1], // stdout + }; + + ls.config.cb = local_sockets_send_to_parent; + ls.config.data = &cw; + ls.proc_self_net_ns_inode = ls.config.net_ns_inode; + + // switch namespace using the custom fd passed via the spawn server + if (setns(request->fds[3], CLONE_NEWNET) == -1) { + local_sockets_log(&ls, "failed to switch network namespace at child process using fd %d", request->fds[3]); + exit(EXIT_FAILURE); + } + + // read all sockets from /proc + local_sockets_read_all_system_sockets(&ls); + + // send all sockets to parent + local_sockets_foreach_local_socket_call_cb(&ls); + + // send the terminating socket + struct local_socket zero = { + .net_ns_inode = ls.config.net_ns_inode, + }; + local_sockets_send_to_parent(&ls, &zero, &cw); + + exit(EXIT_SUCCESS); +} + +static inline bool local_sockets_get_namespace_sockets_with_pid(LS_STATE *ls, struct pid_socket *ps) { char filename[1024]; snprintfz(filename, sizeof(filename), "%s/proc/%d/ns/net", ls->config.host_prefix, ps->pid); @@ -1060,80 +1214,32 @@ static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_ return false; } - int pipefd[2]; - if (pipe(pipefd) != 0) { - local_sockets_log(ls, "cannot create pipe"); + if(ls->spawn_server == NULL) { close(fd); + local_sockets_log(ls, "spawn server is not available"); return false; } - *pid = fork(); - if (*pid == 0) { - // Child process - close(pipefd[0]); + struct local_sockets_config config = ls->config; + config.net_ns_inode = ps->net_ns_inode; + SPAWN_INSTANCE *si = spawn_server_exec(ls->spawn_server, STDERR_FILENO, fd, NULL, &config, sizeof(config), SPAWN_INSTANCE_TYPE_CALLBACK); + close(fd); fd = -1; - // local_sockets_log(ls, "child is here for inode %"PRIu64" and namespace %"PRIu64, ps->inode, ps->net_ns_inode); - - struct local_sockets_child_work cw = { - .net_ns_inode = ps->net_ns_inode, - .fd = pipefd[1], - }; - - ls->config.host_prefix = ""; // we need the /proc of the container - ls->config.cb = local_sockets_send_to_parent; - ls->config.data = &cw; - ls->config.cmdline = false; // we have these already - ls->config.comm = false; // we have these already - ls->config.pid = false; // we have these already - ls->config.namespaces = false; - ls->proc_self_net_ns_inode = ps->net_ns_inode; - - - // switch namespace - if (setns(fd, CLONE_NEWNET) == -1) { - local_sockets_log(ls, "failed to switch network namespace at child process"); - exit(EXIT_FAILURE); - } - -#ifdef HAVE_LIBMNL - local_sockets_netlink_cleanup(ls); - local_sockets_netlink_init(ls); -#endif - - // read all sockets from /proc - local_sockets_read_sockets_from_proc(ls); - - // send all sockets to parent - local_sockets_foreach_local_socket_call_cb(ls); - - // send the terminating socket - struct local_socket zero = { - .net_ns_inode = ps->net_ns_inode, - }; - local_sockets_send_to_parent(ls, &zero, &cw); - -#ifdef HAVE_LIBMNL - local_sockets_netlink_cleanup(ls); -#endif - - close(pipefd[1]); // Close write end of pipe - exit(EXIT_SUCCESS); + if(si == NULL) { + local_sockets_log(ls, "cannot create spawn instance"); + return false; } - // parent - - close(fd); - close(pipefd[1]); size_t received = 0; struct local_socket buf; - while(read(pipefd[0], &buf, sizeof(buf)) == sizeof(buf)) { + while(read(spawn_server_instance_read_fd(si), &buf, sizeof(buf)) == sizeof(buf)) { size_t len = 0; - if(read(pipefd[0], &len, sizeof(len)) != sizeof(len)) + if(read(spawn_server_instance_read_fd(si), &len, sizeof(len)) != sizeof(len)) local_sockets_log(ls, "failed to read cmdline length from pipe"); if(len) { char cmdline[len + 1]; - if(read(pipefd[0], cmdline, len) != (ssize_t)len) + if(read(spawn_server_instance_read_fd(si), cmdline, len) != (ssize_t)len) local_sockets_log(ls, "failed to read cmdline from pipe"); else { cmdline[len] = '\0'; @@ -1153,15 +1259,15 @@ static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_ break; } + spinlock_lock(&ls->spinlock); + SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_get_slot_LOCAL_SOCKET(&ls->sockets_hashtable, buf.inode, &buf, true); LOCAL_SOCKET *n = SIMPLE_HASHTABLE_SLOT_DATA(sl); if(n) { string_freez(buf.cmdline); - // local_sockets_log(ls, // "ns inode %" PRIu64" (comm: '%s', pid: %u, ns: %"PRIu64") already exists in hashtable (comm: '%s', pid: %u, ns: %"PRIu64") - ignoring duplicate", // buf.inode, buf.comm, buf.pid, buf.net_ns_inode, n->comm, n->pid, n->net_ns_inode); - continue; } else { n = aral_mallocz(ls->local_socket_aral); @@ -1170,75 +1276,109 @@ static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_ local_sockets_index_listening_port(ls, n); } + + spinlock_unlock(&ls->spinlock); } - close(pipefd[0]); - + spawn_server_exec_kill(ls->spawn_server, si); return received > 0; } -static inline void local_socket_waitpid(LS_STATE *ls, pid_t pid) { - if(!pid) return; +struct local_sockets_namespace_worker { + LS_STATE *ls; + uint64_t inode; +}; - int status; - waitpid(pid, &status, 0); +static inline void *local_sockets_get_namespace_sockets(void *arg) { + struct local_sockets_namespace_worker *data = arg; + LS_STATE *ls = data->ls; + const uint64_t inode = data->inode; - if (WIFEXITED(status) && WEXITSTATUS(status) != 0) - local_sockets_log(ls, "Child exited with status %d", WEXITSTATUS(status)); - else if (WIFSIGNALED(status)) - local_sockets_log(ls, "Child terminated by signal %d", WTERMSIG(status)); + spinlock_lock(&ls->spinlock); + + // find a pid_socket that has this namespace + for(SIMPLE_HASHTABLE_SLOT_PID_SOCKET *sl_pid = simple_hashtable_first_read_only_PID_SOCKET(&ls->pid_sockets_hashtable) ; + sl_pid ; + sl_pid = simple_hashtable_next_read_only_PID_SOCKET(&ls->pid_sockets_hashtable, sl_pid)) { + struct pid_socket *ps = SIMPLE_HASHTABLE_SLOT_DATA(sl_pid); + if(!ps || ps->net_ns_inode != inode) continue; + + // now we have a pid that has the same namespace inode + + spinlock_unlock(&ls->spinlock); + const bool worked = local_sockets_get_namespace_sockets_with_pid(ls, ps); + spinlock_lock(&ls->spinlock); + + if(worked) + break; + } + + spinlock_unlock(&ls->spinlock); + + return NULL; } static inline void local_sockets_namespaces(LS_STATE *ls) { - pid_t children[5] = { 0 }; - size_t last_child = 0; + size_t threads = ls->config.max_concurrent_namespaces; + if(threads == 0) threads = 5; + if(threads > 100) threads = 100; + + size_t last_thread = 0; + ND_THREAD *workers[threads]; + struct local_sockets_namespace_worker workers_data[threads]; + memset(workers, 0, sizeof(workers)); + memset(workers_data, 0, sizeof(workers_data)); + + spinlock_lock(&ls->spinlock); for(SIMPLE_HASHTABLE_SLOT_NET_NS *sl = simple_hashtable_first_read_only_NET_NS(&ls->ns_hashtable); sl; sl = simple_hashtable_next_read_only_NET_NS(&ls->ns_hashtable, sl)) { - uint64_t inode = (uint64_t)SIMPLE_HASHTABLE_SLOT_DATA(sl); + const uint64_t inode = (uint64_t)SIMPLE_HASHTABLE_SLOT_DATA(sl); if(inode == ls->proc_self_net_ns_inode) continue; - // find a pid_socket that has this namespace - for(SIMPLE_HASHTABLE_SLOT_PID_SOCKET *sl_pid = simple_hashtable_first_read_only_PID_SOCKET(&ls->pid_sockets_hashtable) ; - sl_pid ; - sl_pid = simple_hashtable_next_read_only_PID_SOCKET(&ls->pid_sockets_hashtable, sl_pid)) { - struct pid_socket *ps = SIMPLE_HASHTABLE_SLOT_DATA(sl_pid); - if(!ps || ps->net_ns_inode != inode) continue; + spinlock_unlock(&ls->spinlock); - if(++last_child >= 5) - last_child = 0; + ls->stats.namespaces_found++; - local_socket_waitpid(ls, children[last_child]); - children[last_child] = 0; + if(workers[last_thread] != NULL) { + if(++last_thread >= threads) + last_thread = 0; - // now we have a pid that has the same namespace inode - if(local_sockets_get_namespace_sockets(ls, ps, &children[last_child])) - break; + if(workers[last_thread]) { + nd_thread_join(workers[last_thread]); + workers[last_thread] = NULL; + } } + + workers_data[last_thread].ls = ls; + workers_data[last_thread].inode = inode; + workers[last_thread] = nd_thread_create( + "local-sockets-worker", NETDATA_THREAD_OPTION_JOINABLE, + local_sockets_get_namespace_sockets, &workers_data[last_thread]); + + spinlock_lock(&ls->spinlock); } - for(size_t i = 0; i < 5 ;i++) - local_socket_waitpid(ls, children[i]); + spinlock_unlock(&ls->spinlock); + + // wait all the threads running + for(size_t i = 0; i < threads ;i++) { + if(workers[i]) + nd_thread_join(workers[i]); + } } // -------------------------------------------------------------------------------------------------------------------- static inline void local_sockets_process(LS_STATE *ls) { - -#ifdef HAVE_LIBMNL - local_sockets_netlink_init(ls); -#endif - - ls->config.host_prefix = netdata_configured_host_prefix; - // initialize our hashtables local_sockets_init(ls); // read all sockets from /proc - local_sockets_read_sockets_from_proc(ls); + local_sockets_read_all_system_sockets(ls); // check all socket namespaces if(ls->config.namespaces) @@ -1253,10 +1393,6 @@ static inline void local_sockets_process(LS_STATE *ls) { // free all memory local_sockets_cleanup(ls); - -#ifdef HAVE_LIBMNL - local_sockets_netlink_cleanup(ls); -#endif } static inline void ipv6_address_to_txt(struct in6_addr *in6_addr, char *dst) { diff --git a/src/libnetdata/os/close_range.c b/src/libnetdata/os/close_range.c new file mode 100644 index 0000000000..10869adae4 --- /dev/null +++ b/src/libnetdata/os/close_range.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +static int fd_is_valid(int fd) { + errno_clear(); + return fcntl(fd, F_GETFD) != -1 || errno != EBADF; +} + +int os_get_fd_open_max(void) { + static int fd_open_max = CLOSE_RANGE_FD_MAX; + + if(fd_open_max != CLOSE_RANGE_FD_MAX) + return fd_open_max; + + if(fd_open_max == CLOSE_RANGE_FD_MAX || fd_open_max == -1) { + struct rlimit rl; + if (getrlimit(RLIMIT_NOFILE, &rl) == 0 && rl.rlim_max != RLIM_INFINITY) + fd_open_max = rl.rlim_max; + } + +#ifdef _SC_OPEN_MAX + if(fd_open_max == CLOSE_RANGE_FD_MAX || fd_open_max == -1) { + fd_open_max = sysconf(_SC_OPEN_MAX); + } +#endif + + if(fd_open_max == CLOSE_RANGE_FD_MAX || fd_open_max == -1) { + // Arbitrary default if everything else fails + fd_open_max = 65535; + } + + return fd_open_max; +} + +void os_close_range(int first, int last) { +#if defined(HAVE_CLOSE_RANGE) + if(close_range(first, last, 0) == 0) return; +#endif + +#if defined(OS_LINUX) + DIR *dir = opendir("/proc/self/fd"); + if (dir != NULL) { + struct dirent *entry; + while ((entry = readdir(dir)) != NULL) { + int fd = str2i(entry->d_name); + if (fd >= first && (last == CLOSE_RANGE_FD_MAX || fd <= last) && fd_is_valid(fd)) + (void)close(fd); + } + closedir(dir); + return; + } +#endif + + // Fallback to looping through all file descriptors if necessary + if (last == CLOSE_RANGE_FD_MAX) + last = os_get_fd_open_max(); + + for (int fd = first; fd <= last; fd++) { + if (fd_is_valid(fd)) (void)close(fd); + } +} + +static int compare_ints(const void *a, const void *b) { + int int_a = *((int*)a); + int int_b = *((int*)b); + return (int_a > int_b) - (int_a < int_b); +} + +void os_close_all_non_std_open_fds_except(int fds[], size_t fds_num) { + if (fds_num == 0 || fds == NULL) { + os_close_range(STDERR_FILENO + 1, CLOSE_RANGE_FD_MAX); + return; + } + + qsort(fds, fds_num, sizeof(int), compare_ints); + + int start = STDERR_FILENO + 1; + for (size_t i = 0; i < fds_num; i++) { + if (fds[i] > start) + os_close_range(start, fds[i] - 1); + + start = fds[i] + 1; + } + + os_close_range(start, CLOSE_RANGE_FD_MAX); +} diff --git a/src/libnetdata/os/close_range.h b/src/libnetdata/os/close_range.h new file mode 100644 index 0000000000..239b6cd46f --- /dev/null +++ b/src/libnetdata/os/close_range.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef CLOSE_RANGE_H +#define CLOSE_RANGE_H + +#define CLOSE_RANGE_FD_MAX (int)(~0U) + +int os_get_fd_open_max(void); +void os_close_range(int first, int last); +void os_close_all_non_std_open_fds_except(int fds[], size_t fds_num); + +#endif //CLOSE_RANGE_H diff --git a/src/libnetdata/os/get_pid_max.c b/src/libnetdata/os/get_pid_max.c index 45027961ae..70372a7438 100644 --- a/src/libnetdata/os/get_pid_max.c +++ b/src/libnetdata/os/get_pid_max.c @@ -2,13 +2,27 @@ #include "../libnetdata.h" -pid_t pid_max = 32768; -pid_t os_get_system_pid_max(void) { -#if defined(OS_MACOS) +pid_t pid_max = 4194304; + +pid_t os_get_system_pid_max(void) { + static bool read = false; + if(read) return pid_max; + read = true; + +#if defined(OS_MACOS) + int mib[2]; + int maxproc; + size_t len = sizeof(maxproc); + + mib[0] = CTL_KERN; + mib[1] = KERN_MAXPROC; + + if (sysctl(mib, 2, &maxproc, &len, NULL, 0) == -1) { + pid_max = 99999; // Fallback value + nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot find system max pid. Assuming %d.", pid_max); + } + else pid_max = (pid_t)maxproc; - // As we currently do not know a solution to query pid_max from the os - // we use the number defined in bsd/sys/proc_internal.h in XNU sources - pid_max = 99999; return pid_max; #elif defined(OS_FREEBSD) @@ -17,41 +31,40 @@ pid_t os_get_system_pid_max(void) { if (unlikely(GETSYSCTL_BY_NAME("kern.pid_max", tmp_pid_max))) { pid_max = 99999; - netdata_log_error("Assuming system's maximum pid is %d.", pid_max); - } else { - pid_max = tmp_pid_max; + nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot get system max pid. Assuming %d.", pid_max); } + else + pid_max = tmp_pid_max; return pid_max; #elif defined(OS_LINUX) - static char read = 0; - if(unlikely(read)) return pid_max; - read = 1; - char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", netdata_configured_host_prefix?netdata_configured_host_prefix:""); unsigned long long max = 0; if(read_single_number_file(filename, &max) != 0) { - netdata_log_error("Cannot open file '%s'. Assuming system supports %d pids.", filename, pid_max); + nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot open file '%s'. Assuming system supports %d pids.", filename, pid_max); return pid_max; } if(!max) { - netdata_log_error("Cannot parse file '%s'. Assuming system supports %d pids.", filename, pid_max); + nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot parse file '%s'. Assuming system supports %d pids.", filename, pid_max); return pid_max; } pid_max = (pid_t) max; return pid_max; +#elif defined(OS_WINDOWS) + + pid_max = (pid_t)0x7FFFFFFF; + return pid_max; + #else - // just a big default - - pid_max = 4194304; + // return the default return pid_max; #endif diff --git a/src/libnetdata/os/os.h b/src/libnetdata/os/os.h index e259f2db24..15e74faa76 100644 --- a/src/libnetdata/os/os.h +++ b/src/libnetdata/os/os.h @@ -7,12 +7,13 @@ #include <sys/syscall.h> #endif +#include "setproctitle.h" +#include "close_range.h" #include "setresuid.h" #include "setresgid.h" #include "getgrouplist.h" #include "adjtimex.h" #include "gettid.h" -#include "waitid.h" #include "get_pid_max.h" #include "get_system_cpus.h" #include "tinysleep.h" diff --git a/src/libnetdata/os/setproctitle.c b/src/libnetdata/os/setproctitle.c new file mode 100644 index 0000000000..d931582029 --- /dev/null +++ b/src/libnetdata/os/setproctitle.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" +#include "setproctitle.h" + +void os_setproctitle(const char *new_name, const int argc, const char **argv) { +#ifdef HAVE_SYS_PRCTL_H + // Set the process name (comm) + prctl(PR_SET_NAME, new_name, 0, 0, 0); +#endif + +#ifdef __FreeBSD__ + // Set the process name on FreeBSD + setproctitle("%s", new_name); +#endif + + if(argc && argv) { + // replace with spaces all parameters found (except argv[0]) + for(int i = 1; i < argc ;i++) { + char *s = (char *)&argv[i][0]; + while(*s != '\0') *s++ = ' '; + } + + // overwrite argv[0] + size_t len = strlen(new_name); + const size_t argv0_len = strlen(argv[0]); + strncpyz((char *)argv[0], new_name, MIN(len, argv0_len)); + while(len < argv0_len) + ((char *)argv[0])[len++] = ' '; + } +} diff --git a/src/libnetdata/os/setproctitle.h b/src/libnetdata/os/setproctitle.h new file mode 100644 index 0000000000..0e7211b269 --- /dev/null +++ b/src/libnetdata/os/setproctitle.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef SETPROCTITLE_H +#define SETPROCTITLE_H + +void os_setproctitle(const char *new_name, int argc, const char **argv); + +#endif //SETPROCTITLE_H diff --git a/src/libnetdata/os/waitid.c b/src/libnetdata/os/waitid.c deleted file mode 100644 index b78d704ed0..0000000000 --- a/src/libnetdata/os/waitid.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "../libnetdata.h" - -int os_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options) { -#if defined(HAVE_WAITID) - return waitid(idtype, id, infop, options); -#else - // emulate waitid() using waitpid() - - // a cache for WNOWAIT - static const struct pid_status empty = { 0, 0 }; - static __thread struct pid_status last = { 0, 0 }; // the cache - struct pid_status current = { 0, 0 }; - - // zero the infop structure - memset(infop, 0, sizeof(*infop)); - - // from the infop structure we use only 3 fields: - // - si_pid - // - si_code - // - si_status - // so, we update only these 3 - - switch(idtype) { - case P_ALL: - current.pid = waitpid((pid_t)-1, ¤t.status, options); - if(options & WNOWAIT) - last = current; - else - last = empty; - break; - - case P_PID: - if(last.pid == (pid_t)id) { - current = last; - last = empty; - } - else - current.pid = waitpid((pid_t)id, ¤t.status, options); - - break; - - default: - errno = ENOSYS; - return -1; - } - - if (current.pid > 0) { - if (WIFEXITED(current.status)) { - infop->si_code = CLD_EXITED; - infop->si_status = WEXITSTATUS(current.status); - } else if (WIFSIGNALED(current.status)) { - infop->si_code = WTERMSIG(current.status) == SIGABRT ? CLD_DUMPED : CLD_KILLED; - infop->si_status = WTERMSIG(current.status); - } else if (WIFSTOPPED(current.status)) { - infop->si_code = CLD_STOPPED; - infop->si_status = WSTOPSIG(current.status); - } else if (WIFCONTINUED(current.status)) { - infop->si_code = CLD_CONTINUED; - infop->si_status = SIGCONT; - } - infop->si_pid = current.pid; - return 0; - } else if (current.pid == 0) { - // No change in state, depends on WNOHANG - return 0; - } - - return -1; -#endif -} diff --git a/src/libnetdata/os/waitid.h b/src/libnetdata/os/waitid.h deleted file mode 100644 index 9e1fd6be7d..0000000000 --- a/src/libnetdata/os/waitid.h +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_WAITID_H -#define NETDATA_WAITID_H - -#include "config.h" -#include <sys/types.h> -#include <signal.h> - -#ifdef HAVE_SYS_WAIT_H -#include <sys/wait.h> -#endif - -#ifndef WNOWAIT -#define WNOWAIT 0x01000000 -#endif - -#ifndef WEXITED -#define WEXITED 4 -#endif - -#if !defined(HAVE_WAITID) -typedef enum -{ - P_ALL, /* Wait for any child. */ - P_PID, /* Wait for specified process. */ - P_PGID, /* Wait for members of process group. */ - P_PIDFD, /* Wait for the child referred by the PID file descriptor. */ -} idtype_t; - -struct pid_status { - pid_t pid; - int status; -}; - -#if defined(OS_WINDOWS) && !defined(__CYGWIN__) -typedef uint32_t id_t; -typedef struct { - int si_code; /* Signal code. */ - int si_status; /* Exit value or signal. */ - pid_t si_pid; /* Sending process ID. */ -} siginfo_t; -#endif -#endif - -int os_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); - -#endif //NETDATA_WAITID_H diff --git a/src/libnetdata/popen/README.md b/src/libnetdata/popen/README.md deleted file mode 100644 index ca4877c1a0..0000000000 --- a/src/libnetdata/popen/README.md +++ /dev/null @@ -1,15 +0,0 @@ -<!-- -title: "popen" -custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/popen/README.md -sidebar_label: "popen" -learn_status: "Published" -learn_topic_type: "Tasks" -learn_rel_path: "Developers/libnetdata" ---> - -# popen - -Process management library - - - diff --git a/src/libnetdata/popen/popen.c b/src/libnetdata/popen/popen.c deleted file mode 100644 index c1721e9b43..0000000000 --- a/src/libnetdata/popen/popen.c +++ /dev/null @@ -1,446 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "../libnetdata.h" - -// ---------------------------------------------------------------------------- -// popen with tracking - -static pthread_mutex_t netdata_popen_tracking_mutex = NETDATA_MUTEX_INITIALIZER; - -struct netdata_popen { - pid_t pid; - bool reaped; - siginfo_t infop; - int waitid_ret; - struct netdata_popen *next; - struct netdata_popen *prev; -}; - -static struct netdata_popen *netdata_popen_root = NULL; - -// myp_add_lock takes the lock if we're tracking. -static void netdata_popen_tracking_lock(void) { - netdata_mutex_lock(&netdata_popen_tracking_mutex); -} - -// myp_add_unlock release the lock if we're tracking. -static void netdata_popen_tracking_unlock(void) { - netdata_mutex_unlock(&netdata_popen_tracking_mutex); -} - -// myp_add_locked adds pid if we're tracking. -// myp_add_lock must have been called previously. -static void netdata_popen_tracking_add_pid_unsafe(pid_t pid) { - struct netdata_popen *mp; - - mp = callocz(1, sizeof(struct netdata_popen)); - mp->pid = pid; - - DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(netdata_popen_root, mp, prev, next); -} - -// myp_del deletes pid if we're tracking. -static void netdata_popen_tracking_del_pid(pid_t pid) { - struct netdata_popen *mp; - - netdata_popen_tracking_lock(); - - DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) { - if(unlikely(mp->pid == pid)) - break; - } - - if(mp) { - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(netdata_popen_root, mp, prev, next); - freez(mp); - } - else - netdata_log_error("POPEN: Cannot find pid %d.", pid); - - netdata_popen_tracking_unlock(); -} - -// myp_free cleans up any resources allocated for process -// tracking. -void netdata_popen_tracking_cleanup(void) { - netdata_popen_tracking_lock(); - - while(netdata_popen_root) { - struct netdata_popen *mp = netdata_popen_root; - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(netdata_popen_root, mp, prev, next); - freez(mp); - } - - netdata_popen_tracking_unlock(); -} - -int netdata_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options) { - struct netdata_popen *mp = NULL; - - if(idtype == P_PID && id != 0) { - // the caller is asking to waitid() for a specific child pid - - netdata_popen_tracking_lock(); - DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) { - if(unlikely(mp->pid == (pid_t)id)) - break; - } - - if(!mp) - netdata_popen_tracking_unlock(); - } - - int ret; - if(mp && mp->reaped) { - // we have already reaped this child - ret = mp->waitid_ret; - *infop = mp->infop; - } - else { - // we haven't reaped this child yet - ret = os_waitid(idtype, id, infop, options); - - if(mp && !mp->reaped) { - mp->reaped = true; - mp->infop = *infop; - mp->waitid_ret = ret; - } - } - - if(mp) - netdata_popen_tracking_unlock(); - - return ret; -} - -// ---------------------------------------------------------------------------- -// helpers - -static inline void convert_argv_to_string(char *dst, size_t size, const char *spawn_argv[]) { - int i; - for(i = 0; spawn_argv[i] ;i++) { - if(i == 0) snprintfz(dst, size, "%s", spawn_argv[i]); - else { - size_t len = strlen(dst); - snprintfz(&dst[len], size - len, " '%s'", spawn_argv[i]); - } - } -} - -// ---------------------------------------------------------------------------- -// the core of netdata popen - -/* - * Returns -1 on failure, 0 on success. When POPEN_FLAG_CREATE_PIPE is set, on success set the FILE *fp pointer. - */ -#define PIPE_READ 0 -#define PIPE_WRITE 1 - -static int popene_internal(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_stdin, FILE **fpp_child_stdout, const char *command, const char *spawn_argv[]) { - // create a string to be logged about the command we are running - char command_to_be_logged[2048]; - convert_argv_to_string(command_to_be_logged, sizeof(command_to_be_logged), spawn_argv); - // netdata_log_info("custom_popene() running command: %s", command_to_be_logged); - - int ret = 0; // success by default - int attr_rc = 1; // failure by default - - FILE *fp_child_stdin = NULL, *fp_child_stdout = NULL; - int pipefd_stdin[2] = { -1, -1 }; - int pipefd_stdout[2] = { -1, -1 }; - - pid_t pid; - posix_spawnattr_t attr; - posix_spawn_file_actions_t fa; - - unsigned int fds_to_exclude_from_closing = OPEN_FD_EXCLUDE_STDERR; - - if(posix_spawn_file_actions_init(&fa)) { - netdata_log_error("POPEN: posix_spawn_file_actions_init() failed."); - ret = -1; - goto set_return_values_and_return; - } - - if(fpp_child_stdin) { - if (pipe(pipefd_stdin) == -1) { - netdata_log_error("POPEN: stdin pipe() failed"); - ret = -1; - goto cleanup_and_return; - } - - if ((fp_child_stdin = fdopen(pipefd_stdin[PIPE_WRITE], "w")) == NULL) { - netdata_log_error("POPEN: fdopen() stdin failed"); - ret = -1; - goto cleanup_and_return; - } - - if(posix_spawn_file_actions_adddup2(&fa, pipefd_stdin[PIPE_READ], STDIN_FILENO)) { - netdata_log_error("POPEN: posix_spawn_file_actions_adddup2() on stdin failed."); - ret = -1; - goto cleanup_and_return; - } - } - else { - if (posix_spawn_file_actions_addopen(&fa, STDIN_FILENO, "/dev/null", O_RDONLY, 0)) { - netdata_log_error("POPEN: posix_spawn_file_actions_addopen() on stdin to /dev/null failed."); - // this is not a fatal error - fds_to_exclude_from_closing |= OPEN_FD_EXCLUDE_STDIN; - } - } - - if (fpp_child_stdout) { - if (pipe(pipefd_stdout) == -1) { - netdata_log_error("POPEN: stdout pipe() failed"); - ret = -1; - goto cleanup_and_return; - } - - if ((fp_child_stdout = fdopen(pipefd_stdout[PIPE_READ], "r")) == NULL) { - netdata_log_error("POPEN: fdopen() stdout failed"); - ret = -1; - goto cleanup_and_return; - } - - if(posix_spawn_file_actions_adddup2(&fa, pipefd_stdout[PIPE_WRITE], STDOUT_FILENO)) { - netdata_log_error("POPEN: posix_spawn_file_actions_adddup2() on stdout failed."); - ret = -1; - goto cleanup_and_return; - } - } - else { - if (posix_spawn_file_actions_addopen(&fa, STDOUT_FILENO, "/dev/null", O_WRONLY, 0)) { - netdata_log_error("POPEN: posix_spawn_file_actions_addopen() on stdout to /dev/null failed."); - // this is not a fatal error - fds_to_exclude_from_closing |= OPEN_FD_EXCLUDE_STDOUT; - } - } - - if(flags & POPEN_FLAG_CLOSE_FD) { - // Mark all files to be closed by the exec() stage of posix_spawn() - for_each_open_fd(OPEN_FD_ACTION_FD_CLOEXEC, fds_to_exclude_from_closing); - } - - attr_rc = posix_spawnattr_init(&attr); - if(attr_rc) { - // failed - netdata_log_error("POPEN: posix_spawnattr_init() failed."); - } - else { - // success - // reset all signals in the child - - if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF)) - netdata_log_error("POPEN: posix_spawnattr_setflags() failed."); - - sigset_t mask; - sigemptyset(&mask); - - if (posix_spawnattr_setsigmask(&attr, &mask)) - netdata_log_error("POPEN: posix_spawnattr_setsigmask() failed."); - } - - // Take the lock while we fork to ensure we don't race with SIGCHLD - // delivery on a process which exits quickly. - netdata_popen_tracking_lock(); - if (!posix_spawn(&pid, command, &fa, &attr, (char * const*)spawn_argv, env)) { - // success - *pidptr = pid; - netdata_popen_tracking_add_pid_unsafe(pid); - netdata_popen_tracking_unlock(); - } - else { - // failure - netdata_popen_tracking_unlock(); - netdata_log_error("POPEN: failed to spawn command: \"%s\" from parent pid %d.", command_to_be_logged, getpid()); - ret = -1; - goto cleanup_and_return; - } - - // the normal cleanup will run - // but ret == 0 at this point - -cleanup_and_return: - if(!attr_rc) { - // posix_spawnattr_init() succeeded - if (posix_spawnattr_destroy(&attr)) - netdata_log_error("POPEN: posix_spawnattr_destroy() failed"); - } - - if (posix_spawn_file_actions_destroy(&fa)) - netdata_log_error("POPEN: posix_spawn_file_actions_destroy() failed"); - - // the child end - close it - if(pipefd_stdin[PIPE_READ] != -1) - close(pipefd_stdin[PIPE_READ]); - - // our end - if(ret == -1 || !fpp_child_stdin) { - if (fp_child_stdin) - fclose(fp_child_stdin); - else if (pipefd_stdin[PIPE_WRITE] != -1) - close(pipefd_stdin[PIPE_WRITE]); - - fp_child_stdin = NULL; - } - - // the child end - close it - if (pipefd_stdout[PIPE_WRITE] != -1) - close(pipefd_stdout[PIPE_WRITE]); - - // our end - if (ret == -1 || !fpp_child_stdout) { - if (fp_child_stdout) - fclose(fp_child_stdout); - else if (pipefd_stdout[PIPE_READ] != -1) - close(pipefd_stdout[PIPE_READ]); - - fp_child_stdout = NULL; - } - -set_return_values_and_return: - if(fpp_child_stdin) - *fpp_child_stdin = fp_child_stdin; - - if(fpp_child_stdout) - *fpp_child_stdout = fp_child_stdout; - - return ret; -} - -int netdata_popene_variadic_internal_dont_use_directly(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_input, FILE **fpp_child_output, const char *command, ...) { - // convert the variable list arguments into what posix_spawn() needs - // all arguments are expected strings - va_list args; - int args_count; - - // count the number variable parameters - // the variable parameters are expected NULL terminated - { - const char *s; - - va_start(args, command); - args_count = 0; - while ((s = va_arg(args, const char *))) args_count++; - va_end(args); - } - - // create a string pointer array as needed by posix_spawn() - // variable array in the stack - const char *spawn_argv[args_count + 1]; - { - const char *s; - va_start(args, command); - int i; - for (i = 0; i < args_count; i++) { - s = va_arg(args, const char *); - spawn_argv[i] = s; - } - spawn_argv[args_count] = NULL; - va_end(args); - } - - return popene_internal(pidptr, env, flags, fpp_child_input, fpp_child_output, command, spawn_argv); -} - -// See man environ -extern char **environ; - -FILE *netdata_popen(const char *command, volatile pid_t *pidptr, FILE **fpp_child_input) { - FILE *fp_child_output = NULL; - const char *spawn_argv[] = { - "sh", - "-c", - command, - NULL - }; - (void)popene_internal(pidptr, environ, POPEN_FLAG_CLOSE_FD, fpp_child_input, &fp_child_output, "/bin/sh", spawn_argv); - return fp_child_output; -} - -FILE *netdata_popene(const char *command, volatile pid_t *pidptr, char **env, FILE **fpp_child_input) { - FILE *fp_child_output = NULL; - const char *spawn_argv[] = { - "sh", - "-c", - command, - NULL - }; - (void)popene_internal(pidptr, env, POPEN_FLAG_CLOSE_FD, fpp_child_input, &fp_child_output, "/bin/sh", spawn_argv); - return fp_child_output; -} - -// returns 0 on success, -1 on failure -int netdata_spawn(const char *command, volatile pid_t *pidptr) { - const char *spawn_argv[] = { - "sh", - "-c", - command, - NULL - }; - return popene_internal(pidptr, environ, POPEN_FLAG_NONE, NULL, NULL, "/bin/sh", spawn_argv); -} - -int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid) { - int ret; - siginfo_t info; - - netdata_log_debug(D_EXIT, "Request to netdata_pclose() on pid %d", pid); - - if (fp_child_input) - fclose(fp_child_input); - - if (fp_child_output) - fclose(fp_child_output); - - errno = 0; - - ret = netdata_waitid(P_PID, (id_t) pid, &info, WEXITED); - netdata_popen_tracking_del_pid(pid); - - if (ret != -1) { - switch (info.si_code) { - case CLD_EXITED: - if(info.si_status) - netdata_log_error("child pid %d exited with code %d.", info.si_pid, info.si_status); - return(info.si_status); - - case CLD_KILLED: - if(info.si_status == SIGTERM) { - netdata_log_info("child pid %d killed by SIGTERM", info.si_pid); - return(0); - } - else if(info.si_status == SIGPIPE) { - netdata_log_info("child pid %d killed by SIGPIPE.", info.si_pid); - return(0); - } - else { - netdata_log_error("child pid %d killed by signal %d.", info.si_pid, info.si_status); - return(-1); - } - - case CLD_DUMPED: - netdata_log_error("child pid %d core dumped by signal %d.", info.si_pid, info.si_status); - return(-2); - - case CLD_STOPPED: - netdata_log_error("child pid %d stopped by signal %d.", info.si_pid, info.si_status); - return(0); - - case CLD_TRAPPED: - netdata_log_error("child pid %d trapped by signal %d.", info.si_pid, info.si_status); - return(-4); - - case CLD_CONTINUED: - netdata_log_error("child pid %d continued by signal %d.", info.si_pid, info.si_status); - return(0); - - default: - netdata_log_error("child pid %d gave us a SIGCHLD with code %d and status %d.", info.si_pid, info.si_code, info.si_status); - return(-5); - } - } - else - netdata_log_error("Cannot waitid() for pid %d", pid); - - return 0; -} diff --git a/src/libnetdata/popen/popen.h b/src/libnetdata/popen/popen.h deleted file mode 100644 index 8f46abbc8c..0000000000 --- a/src/libnetdata/popen/popen.h +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_POPEN_H -#define NETDATA_POPEN_H 1 - -#include "../os/waitid.h" -int netdata_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); - -#include "../libnetdata.h" - -#define PIPE_READ 0 -#define PIPE_WRITE 1 - -/* custom_popene_variadic_internal_dont_use_directly flag definitions */ -#define POPEN_FLAG_NONE 0 -#define POPEN_FLAG_CLOSE_FD (1 << 0) // Close all file descriptors other than STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO - -// the flags to be used by default -#define POPEN_FLAGS_DEFAULT (POPEN_FLAG_CLOSE_FD) - -// mypopen_raw is the interface to use instead of custom_popene_variadic_internal_dont_use_directly() -// mypopen_raw will add the terminating NULL at the arguments list -// we append the parameter 'command' twice - this is because the underlying call needs the command to execute and the argv[0] to pass to it -#define netdata_popen_raw_default_flags_and_environment(pidptr, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, environ, POPEN_FLAGS_DEFAULT, fpp_child_input, fpp_child_output, command, command, ##args, NULL) -#define netdata_popen_raw_default_flags(pidptr, env, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, env, POPEN_FLAGS_DEFAULT, fpp_child_input, fpp_child_output, command, command, ##args, NULL) -#define netdata_popen_raw(pidptr, env, flags, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, env, flags, fpp_child_input, fpp_child_output, command, command, ##args, NULL) - -FILE *netdata_popen(const char *command, volatile pid_t *pidptr, FILE **fp_child_input); -FILE *netdata_popene(const char *command, volatile pid_t *pidptr, char **env, FILE **fp_child_input); -int netdata_popene_variadic_internal_dont_use_directly(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_input, FILE **fpp_child_output, const char *command, ...); -int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid); - -int netdata_spawn(const char *command, volatile pid_t *pidptr); - -#endif /* NETDATA_POPEN_H */ diff --git a/src/libnetdata/procfile/procfile.c b/src/libnetdata/procfile/procfile.c index d9ebf4c932..2b7eeeb561 100644 --- a/src/libnetdata/procfile/procfile.c +++ b/src/libnetdata/procfile/procfile.c @@ -336,7 +336,7 @@ __attribute__((constructor)) void procfile_initialize_default_separators(void) { if(unlikely(i == '\n' || i == '\r')) procfile_default_separators[i] = PF_CHAR_IS_NEWLINE; - else if(unlikely(isspace(i) || !isprint(i))) + else if(unlikely(isspace(i) || (!isprint(i) && !IS_UTF8_BYTE(i)))) procfile_default_separators[i] = PF_CHAR_IS_SEPARATOR; else diff --git a/src/libnetdata/socket/socket.c b/src/libnetdata/socket/socket.c index 0ba24b7474..7170a39637 100644 --- a/src/libnetdata/socket/socket.c +++ b/src/libnetdata/socket/socket.c @@ -194,11 +194,9 @@ int sock_setreuse(int fd, int reuse) { void sock_setcloexec(int fd) { UNUSED(fd); -#ifndef SOCK_CLOEXEC int flags = fcntl(fd, F_GETFD); if (flags != -1) (void) fcntl(fd, F_SETFD, flags | FD_CLOEXEC); -#endif } int sock_setreuse_port(int fd __maybe_unused, int reuse __maybe_unused) { @@ -290,7 +288,7 @@ int create_listen_socket_unix(const char *path, int listen_backlog) { name.sun_family = AF_UNIX; strncpy(name.sun_path, path, sizeof(name.sun_path)-1); - errno = 0; + errno_clear(); if (unlink(path) == -1 && errno != ENOENT) nd_log(NDLS_DAEMON, NDLP_ERR, "LISTENER: failed to remove existing (probably obsolete or left-over) file on UNIX socket path '%s'.", @@ -918,7 +916,7 @@ int connect_to_this_ip46(int protocol, int socktype, const char *host, uint32_t } sock_setcloexec(fd); - errno = 0; + errno_clear(); if(connect(fd, ai->ai_addr, ai->ai_addrlen) < 0) { if(errno == EALREADY || errno == EINPROGRESS) { nd_log(NDLS_DAEMON, NDLP_DEBUG, @@ -1200,7 +1198,7 @@ inline int wait_on_socket_or_cancel_with_timeout( const int wait_ms = (timeout_ms >= ND_CHECK_CANCELLABILITY_WHILE_WAITING_EVERY_MS || forever) ? ND_CHECK_CANCELLABILITY_WHILE_WAITING_EVERY_MS : timeout_ms; - errno = 0; + errno_clear(); // check every wait_ms const int ret = poll(&pfd, 1, wait_ms); @@ -1482,7 +1480,7 @@ int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *clien break; } if (!connection_allowed(nfd, client_ip, client_host, hostsize, access_list, "connection", allow_dns)) { - errno = 0; + errno_clear(); nd_log(NDLS_DAEMON, NDLP_WARNING, "Permission denied for client '%s', port '%s'", client_ip, client_port); diff --git a/src/libnetdata/spawn_server/spawn_popen.c b/src/libnetdata/spawn_server/spawn_popen.c new file mode 100644 index 0000000000..d3109fb8d3 --- /dev/null +++ b/src/libnetdata/spawn_server/spawn_popen.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "spawn_popen.h" + +SPAWN_SERVER *netdata_main_spawn_server = NULL; + +bool netdata_main_spawn_server_init(const char *name, int argc, const char **argv) { + if(netdata_main_spawn_server == NULL) { + static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER; + spinlock_lock(&spinlock); + if(netdata_main_spawn_server == NULL) + netdata_main_spawn_server = spawn_server_create(name, NULL, argc, argv); + spinlock_unlock(&spinlock); + } + + return netdata_main_spawn_server != NULL; +} + +void netdata_main_spawn_server_cleanup(void) { + if(netdata_main_spawn_server) { + spawn_server_destroy(netdata_main_spawn_server); + netdata_main_spawn_server = NULL; + } +} + +POPEN_INSTANCE *spawn_popen_run_argv(const char **argv) { + netdata_main_spawn_server_init(NULL, 0, NULL); + + SPAWN_INSTANCE *si = spawn_server_exec(netdata_main_spawn_server, nd_log_collectors_fd(), + 0, argv, NULL, 0, SPAWN_INSTANCE_TYPE_EXEC); + + if(si == NULL) return NULL; + + POPEN_INSTANCE *pi = mallocz(sizeof(*pi)); + pi->si = si; + pi->child_stdin_fp = fdopen(spawn_server_instance_write_fd(si), "w"); + pi->child_stdout_fp = fdopen(spawn_server_instance_read_fd(si), "r"); + + if(!pi->child_stdin_fp) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "Cannot open FILE on child's stdin on fd %d.", spawn_server_instance_write_fd(si)); + goto cleanup; + } + + if(!pi->child_stdout_fp) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "Cannot open FILE on child's stdout on fd %d.", spawn_server_instance_read_fd(si)); + goto cleanup; + } + + return pi; + +cleanup: + if(pi->child_stdin_fp) { fclose(pi->child_stdin_fp); spawn_server_instance_write_fd(si); } + if(pi->child_stdout_fp) { fclose(pi->child_stdout_fp); spawn_server_instance_read_fd_unset(si); } + spawn_server_exec_kill(netdata_main_spawn_server, si); + freez(pi); + return NULL; +} + +POPEN_INSTANCE *spawn_popen_run_variadic(const char *cmd, ...) { + va_list args; + va_list args_copy; + int argc = 0; + + // Start processing variadic arguments + va_start(args, cmd); + + // Make a copy of args to count the number of arguments + va_copy(args_copy, args); + while (va_arg(args_copy, char *) != NULL) argc++; + va_end(args_copy); + + // Allocate memory for argv array (+2 for cmd and NULL terminator) + const char *argv[argc + 2]; + + // Populate the argv array + argv[0] = cmd; + + for (int i = 1; i <= argc; i++) + argv[i] = va_arg(args, const char *); + + argv[argc + 1] = NULL; // NULL-terminate the array + + // End processing variadic arguments + va_end(args); + + return spawn_popen_run_argv(argv); +} + +POPEN_INSTANCE *spawn_popen_run(const char *cmd) { + if(!cmd || !*cmd) return NULL; + + const char *argv[] = { + "/bin/sh", + "-c", + cmd, + NULL + }; + return spawn_popen_run_argv(argv); +} + +static int spawn_popen_status_rc(int status) { + if(WIFEXITED(status)) + return WEXITSTATUS(status); + + if(WIFSIGNALED(status)) { + int sig = WTERMSIG(status); + switch(sig) { + case SIGTERM: + case SIGPIPE: + return 0; + + default: + return -1; + } + } + + return -1; +} + +int spawn_popen_wait(POPEN_INSTANCE *pi) { + if(!pi) return -1; + + fclose(pi->child_stdin_fp); pi->child_stdin_fp = NULL; spawn_server_instance_write_fd_unset(pi->si); + fclose(pi->child_stdout_fp); pi->child_stdout_fp = NULL; spawn_server_instance_read_fd_unset(pi->si); + int status = spawn_server_exec_wait(netdata_main_spawn_server, pi->si); + freez(pi); + return spawn_popen_status_rc(status); +} + +int spawn_popen_kill(POPEN_INSTANCE *pi) { + if(!pi) return -1; + + fclose(pi->child_stdin_fp); pi->child_stdin_fp = NULL; spawn_server_instance_write_fd_unset(pi->si); + fclose(pi->child_stdout_fp); pi->child_stdout_fp = NULL; spawn_server_instance_read_fd_unset(pi->si); + int status = spawn_server_exec_kill(netdata_main_spawn_server, pi->si); + freez(pi); + return spawn_popen_status_rc(status); +} diff --git a/src/libnetdata/spawn_server/spawn_popen.h b/src/libnetdata/spawn_server/spawn_popen.h new file mode 100644 index 0000000000..253d1f34be --- /dev/null +++ b/src/libnetdata/spawn_server/spawn_popen.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef SPAWN_POPEN_H +#define SPAWN_POPEN_H + +#include "../libnetdata.h" + +extern SPAWN_SERVER *netdata_main_spawn_server; +bool netdata_main_spawn_server_init(const char *name, int argc, const char **argv); +void netdata_main_spawn_server_cleanup(void); + +typedef struct { + SPAWN_INSTANCE *si; + FILE *child_stdin_fp; + FILE *child_stdout_fp; +} POPEN_INSTANCE; + +POPEN_INSTANCE *spawn_popen_run(const char *cmd); +POPEN_INSTANCE *spawn_popen_run_argv(const char **argv); +POPEN_INSTANCE *spawn_popen_run_variadic(const char *cmd, ...); +int spawn_popen_wait(POPEN_INSTANCE *pi); +int spawn_popen_kill(POPEN_INSTANCE *pi); + +#endif //SPAWN_POPEN_H diff --git a/src/libnetdata/spawn_server/spawn_server.c b/src/libnetdata/spawn_server/spawn_server.c new file mode 100644 index 0000000000..fe9b35db9a --- /dev/null +++ b/src/libnetdata/spawn_server/spawn_server.c @@ -0,0 +1,1318 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +#include "spawn_server.h" + +#if defined(OS_WINDOWS) +#include <windows.h> +#include <io.h> +#include <fcntl.h> +#include <process.h> +#include <sys/cygwin.h> +#endif + +struct spawn_server { + size_t id; + size_t request_id; + const char *name; +#if !defined(OS_WINDOWS) + int pipe[2]; + int server_sock; + pid_t server_pid; + char *path; + spawn_request_callback_t cb; + + int argc; + const char **argv; + size_t argv0_size; +#endif +}; + +struct spawm_instance { + size_t request_id; + int client_sock; + int write_fd; + int read_fd; + pid_t child_pid; + +#if defined(OS_WINDOWS) + HANDLE process_handle; + HANDLE read_handle; + HANDLE write_handle; +#endif +}; + +int spawn_server_instance_read_fd(SPAWN_INSTANCE *si) { return si->read_fd; } +int spawn_server_instance_write_fd(SPAWN_INSTANCE *si) { return si->write_fd; } +pid_t spawn_server_instance_pid(SPAWN_INSTANCE *si) { return si->child_pid; } +void spawn_server_instance_read_fd_unset(SPAWN_INSTANCE *si) { si->read_fd = -1; } +void spawn_server_instance_write_fd_unset(SPAWN_INSTANCE *si) { si->write_fd = -1; } + +#if defined(OS_WINDOWS) + +SPAWN_SERVER* spawn_server_create(const char *name, spawn_request_callback_t cb __maybe_unused, int argc __maybe_unused, const char **argv __maybe_unused) { + SPAWN_SERVER* server = callocz(1, sizeof(SPAWN_SERVER)); + if(name) + server->name = strdupz(name); + return server; +} + +void spawn_server_destroy(SPAWN_SERVER *server) { + if (server) { + if(server->name) freez((void *)server->name); + freez(server); + } +} + +static BUFFER *argv_to_windows(const char **argv) { + BUFFER *wb = buffer_create(0, NULL); + + // argv[0] is the path + char b[strlen(argv[0]) * 2 + 1024]; + cygwin_conv_path(CCP_POSIX_TO_WIN_A | CCP_ABSOLUTE, argv[0], b, sizeof(b)); + + buffer_strcat(wb, "cmd.exe /C "); + + for(size_t i = 0; argv[i] ;i++) { + const char *s = (i == 0) ? b : argv[i]; + size_t len = strlen(s); + buffer_need_bytes(wb, len * 2 + 1); + + bool needs_quotes = false; + for(const char *c = s; !needs_quotes && *c ; c++) { + switch(*c) { + case ' ': + case '\v': + case '\t': + case '\n': + case '"': + needs_quotes = true; + break; + + default: + break; + } + } + + if(needs_quotes && buffer_strlen(wb)) + buffer_strcat(wb, " \""); + else + buffer_putc(wb, ' '); + + for(const char *c = s; *c ; c++) { + switch(*c) { + case '"': + buffer_putc(wb, '\\'); + // fall through + + default: + buffer_putc(wb, *c); + break; + } + } + + if(needs_quotes) + buffer_strcat(wb, "\""); + } + + return wb; +} + +SPAWN_INSTANCE* spawn_server_exec(SPAWN_SERVER *server, int stderr_fd, int custom_fd __maybe_unused, const char **argv, const void *data __maybe_unused, size_t data_size __maybe_unused, SPAWN_INSTANCE_TYPE type) { + static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER; + + if (type != SPAWN_INSTANCE_TYPE_EXEC) + return NULL; + + int pipe_stdin[2] = { -1, -1 }, pipe_stdout[2] = { -1, -1 }; + + errno_clear(); + + SPAWN_INSTANCE *instance = callocz(1, sizeof(*instance)); + instance->request_id = __atomic_add_fetch(&server->request_id, 1, __ATOMIC_RELAXED); + + CLEAN_BUFFER *wb = argv_to_windows(argv); + char *command = (char *)buffer_tostring(wb); + + if (pipe(pipe_stdin) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: Cannot create stdin pipe() for request No %zu, command: %s", + instance->request_id, command); + goto cleanup; + } + + if (pipe(pipe_stdout) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: Cannot create stdout pipe() for request No %zu, command: %s", + instance->request_id, command); + goto cleanup; + } + + // do not run multiple times this section + // to prevent handles leaking + spinlock_lock(&spinlock); + + // Convert POSIX file descriptors to Windows handles + HANDLE stdin_read_handle = (HANDLE)_get_osfhandle(pipe_stdin[0]); + HANDLE stdout_write_handle = (HANDLE)_get_osfhandle(pipe_stdout[1]); + HANDLE stderr_handle = (HANDLE)_get_osfhandle(stderr_fd); + + if (stdin_read_handle == INVALID_HANDLE_VALUE || stdout_write_handle == INVALID_HANDLE_VALUE || stderr_handle == INVALID_HANDLE_VALUE) { + spinlock_unlock(&spinlock); + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: Invalid handle value(s) for request No %zu, command: %s", + instance->request_id, command); + goto cleanup; + } + + // Set handle inheritance + if (!SetHandleInformation(stdin_read_handle, HANDLE_FLAG_INHERIT, HANDLE_FLAG_INHERIT) || + !SetHandleInformation(stdout_write_handle, HANDLE_FLAG_INHERIT, HANDLE_FLAG_INHERIT) || + !SetHandleInformation(stderr_handle, HANDLE_FLAG_INHERIT, HANDLE_FLAG_INHERIT)) { + spinlock_unlock(&spinlock); + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: Cannot set handle(s) inheritance for request No %zu, command: %s", + instance->request_id, command); + goto cleanup; + } + + // Set up the STARTUPINFO structure + STARTUPINFO si; + PROCESS_INFORMATION pi; + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES; + si.hStdInput = stdin_read_handle; + si.hStdOutput = stdout_write_handle; + si.hStdError = stderr_handle; + + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: Running request No %zu, command: %s", + instance->request_id, command); + + // Spawn the process + if (!CreateProcess(NULL, command, NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi)) { + spinlock_unlock(&spinlock); + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: cannot CreateProcess() for request No %zu, command: %s", + instance->request_id, command); + goto cleanup; + } + + CloseHandle(pi.hThread); + + // end of the critical section + spinlock_unlock(&spinlock); + + // Close unused pipe ends + close(pipe_stdin[0]); pipe_stdin[0] = -1; + close(pipe_stdout[1]); pipe_stdout[1] = -1; + + // Store process information in instance + instance->child_pid = cygwin_winpid_to_pid(pi.dwProcessId); + if(instance->child_pid == -1) instance->child_pid = pi.dwProcessId; + + instance->process_handle = pi.hProcess; + + // Convert handles to POSIX file descriptors + instance->write_fd = pipe_stdin[1]; + instance->read_fd = pipe_stdout[0]; + + errno_clear(); + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: created process for request No %zu, pid %d, command: %s", + instance->request_id, (int)instance->child_pid, command); + + return instance; + +cleanup: + if (pipe_stdin[0] >= 0) close(pipe_stdin[0]); + if (pipe_stdin[1] >= 0) close(pipe_stdin[1]); + if (pipe_stdout[0] >= 0) close(pipe_stdout[0]); + if (pipe_stdout[1] >= 0) close(pipe_stdout[1]); + freez(instance); + return NULL; +} + +int spawn_server_exec_kill(SPAWN_SERVER *server __maybe_unused, SPAWN_INSTANCE *instance) { + if(instance->read_fd != -1) { close(instance->read_fd); instance->read_fd = -1; } + if(instance->write_fd != -1) { close(instance->write_fd); instance->write_fd = -1; } + CloseHandle(instance->read_handle); instance->read_handle = NULL; + CloseHandle(instance->write_handle); instance->write_handle = NULL; + + TerminateProcess(instance->process_handle, 0); + + DWORD exit_code; + GetExitCodeProcess(instance->process_handle, &exit_code); + CloseHandle(instance->process_handle); + + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: child of request No %zu, pid %d, killed and exited with code %d", + instance->request_id, (int)instance->child_pid, (int)exit_code); + + freez(instance); + return (int)exit_code; +} + +int spawn_server_exec_wait(SPAWN_SERVER *server __maybe_unused, SPAWN_INSTANCE *instance) { + if(instance->read_fd != -1) { close(instance->read_fd); instance->read_fd = -1; } + if(instance->write_fd != -1) { close(instance->write_fd); instance->write_fd = -1; } + CloseHandle(instance->read_handle); instance->read_handle = NULL; + CloseHandle(instance->write_handle); instance->write_handle = NULL; + + WaitForSingleObject(instance->process_handle, INFINITE); + + DWORD exit_code = -1; + GetExitCodeProcess(instance->process_handle, &exit_code); + CloseHandle(instance->process_handle); + + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN PARENT: child of request No %zu, pid %d, waited and exited with code %d", + instance->request_id, (int)instance->child_pid, (int)exit_code); + + freez(instance); + return (int)exit_code; +} + +#else // !OS_WINDOWS + +#ifdef __APPLE__ +#include <crt_externs.h> +#define environ (*_NSGetEnviron()) +#else +extern char **environ; +#endif + +static size_t spawn_server_id = 0; +static volatile bool spawn_server_exit = false; +static volatile bool spawn_server_sigchld = false; +static SPAWN_REQUEST *spawn_server_requests = NULL; + +// -------------------------------------------------------------------------------------------------------------------- + +static int connect_to_spawn_server(const char *path, bool log) { + int sock = -1; + + if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + if(log) + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: cannot create socket() to connect to spawn server."); + return -1; + } + + struct sockaddr_un server_addr = { + .sun_family = AF_UNIX, + }; + strcpy(server_addr.sun_path, path); + + if (connect(sock, (struct sockaddr *)&server_addr, sizeof(server_addr)) == -1) { + if(log) + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: Cannot connect() to spawn server."); + close(sock); + return -1; + } + + return sock; +} + +// -------------------------------------------------------------------------------------------------------------------- +// the child created by the spawn server + +typedef enum __attribute__((packed)) { + STATUS_REPORT_STARTED, + STATUS_REPORT_FAILED, + STATUS_REPORT_EXITED, + STATUS_REPORT_PING, +} STATUS_REPORT; + +struct status_report { + STATUS_REPORT status; + union { + struct { + pid_t pid; + } started; + + struct { + int err_no; + } failed; + + struct { + int waitpid_status; + } exited; + }; +}; + +static void spawn_server_send_status_ping(int fd) { + struct status_report sr = { + .status = STATUS_REPORT_PING, + }; + + if(write(fd, &sr, sizeof(sr)) != sizeof(sr)) + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Cannot send ping status report"); +} + +static void spawn_server_send_status_success(int fd) { + const struct status_report sr = { + .status = STATUS_REPORT_STARTED, + .started = { + .pid = getpid(), + }, + }; + + if(write(fd, &sr, sizeof(sr)) != sizeof(sr)) + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Cannot send success status report"); +} + +static void spawn_server_send_status_failure(int fd) { + struct status_report sr = { + .status = STATUS_REPORT_FAILED, + .failed = { + .err_no = errno, + }, + }; + + if(write(fd, &sr, sizeof(sr)) != sizeof(sr)) + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Cannot send failure status report"); +} + +static void spawn_server_send_status_exit(int fd, int waitpid_status) { + struct status_report sr = { + .status = STATUS_REPORT_EXITED, + .exited = { + .waitpid_status = waitpid_status, + }, + }; + + if(write(fd, &sr, sizeof(sr)) != sizeof(sr)) + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Cannot send exit status report"); +} + +static void spawn_server_run_child(SPAWN_SERVER *server, SPAWN_REQUEST *request) { + // fprintf(stderr, "CHILD: running request %zu on pid %d\n", request->request_id, getpid()); + + // close the server sockets; + close(server->server_sock); server->server_sock = -1; + if(server->pipe[0] != -1) { close(server->pipe[0]); server->pipe[0] = -1; } + if(server->pipe[1] != -1) { close(server->pipe[1]); server->pipe[1] = -1; } + + // set the process name + { + char buf[15]; + snprintfz(buf, sizeof(buf), "chld-%zu-r%zu", server->id, request->request_id); + os_setproctitle(buf, server->argc, server->argv); + } + + // get the fds from the request + int stdin_fd = request->fds[0]; + int stdout_fd = request->fds[1]; + int stderr_fd = request->fds[2]; + int custom_fd = request->fds[3]; + + // change stdio fds to the ones in the request + if (dup2(stdin_fd, STDIN_FILENO) == -1) { + spawn_server_send_status_failure(stdout_fd); + exit(1); + } + if (dup2(stdout_fd, STDOUT_FILENO) == -1) { + spawn_server_send_status_failure(stdout_fd); + exit(1); + } + if (dup2(stderr_fd, STDERR_FILENO) == -1) { + spawn_server_send_status_failure(stdout_fd); + exit(1); + } + + // close the excess fds + close(stdin_fd); stdin_fd = request->fds[0] = STDIN_FILENO; + close(stdout_fd); stdout_fd = request->fds[1] = STDOUT_FILENO; + close(stderr_fd); stderr_fd = request->fds[2] = STDERR_FILENO; + + // overwrite the process environment + environ = (char **)request->environment; + + // Perform different actions based on the type + switch (request->type) { + + case SPAWN_INSTANCE_TYPE_EXEC: + spawn_server_send_status_success(request->socket); + close(request->socket); request->socket = -1; + close(custom_fd); custom_fd = -1; + execvp(request->argv[0], (char **)request->argv); + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN SERVER: Failed to execute command of request No %zu (argv[0] = '%s')", + request->request_id, request->argv[0]); + exit(1); + break; + + case SPAWN_INSTANCE_TYPE_CALLBACK: + if(server->cb == NULL) { + errno = ENOENT; + spawn_server_send_status_failure(request->socket); + close(request->socket); request->socket = -1; + exit(1); + } + spawn_server_send_status_success(request->socket); + close(request->socket); request->socket = -1; + server->cb(request); + exit(0); + break; + + default: + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: unknown request type %u", request->type); + exit(1); + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// Encoding and decoding of spawn server request argv type of data + +// Function to encode argv or envp +static void* encode_argv(const char **argv, size_t *out_size) { + size_t buffer_size = 1024; // Initial buffer size + size_t buffer_used = 0; + char *buffer = mallocz(buffer_size); + + if(argv) { + for (const char **p = argv; *p != NULL; p++) { + if (strlen(*p) == 0) + continue; // Skip empty strings + + size_t len = strlen(*p) + 1; + size_t wanted_size = buffer_used + len + 1; + + if (wanted_size >= buffer_size) { + buffer_size *= 2; + + if(buffer_size < wanted_size) + buffer_size = wanted_size; + + buffer = reallocz(buffer, buffer_size); + } + + memcpy(&buffer[buffer_used], *p, len); + buffer_used += len; + } + } + + buffer[buffer_used++] = '\0'; // Final empty string + *out_size = buffer_used; + + return buffer; +} + +// Function to decode argv or envp +static const char** decode_argv(const char *buffer, size_t size) { + size_t count = 0; + const char *ptr = buffer; + while (ptr < buffer + size) { + if(ptr && *ptr) { + count++; + ptr += strlen(ptr) + 1; + } + else + break; + } + + const char **argv = mallocz((count + 1) * sizeof(char *)); + + ptr = buffer; + for (size_t i = 0; i < count; i++) { + argv[i] = ptr; + ptr += strlen(ptr) + 1; + } + argv[count] = NULL; // Null-terminate the array + + return argv; +} + +// -------------------------------------------------------------------------------------------------------------------- +// Sending and receiving requests + +typedef enum __attribute__((packed)) { + SPAWN_SERVER_MSG_INVALID = 0, + SPAWN_SERVER_MSG_REQUEST, + SPAWN_SERVER_MSG_PING, +} SPAWN_SERVER_MSG; + +static bool spawn_server_is_running(const char *path) { + struct msghdr msg = {0}; + struct iovec iov[6]; + SPAWN_SERVER_MSG msg_type = SPAWN_SERVER_MSG_PING; + size_t dummy_size = 0; + SPAWN_INSTANCE_TYPE dummy_type = 0; + char cmsgbuf[CMSG_SPACE(sizeof(int))]; + + iov[0].iov_base = &msg_type; + iov[0].iov_len = sizeof(msg_type); + + iov[1].iov_base = &dummy_size; + iov[1].iov_len = sizeof(dummy_size); + + iov[2].iov_base = &dummy_size; + iov[2].iov_len = sizeof(dummy_size); + + iov[3].iov_base = &dummy_size; + iov[3].iov_len = sizeof(dummy_size); + + iov[4].iov_base = &dummy_size; + iov[4].iov_len = sizeof(dummy_size); + + iov[5].iov_base = &dummy_type; + iov[5].iov_len = sizeof(dummy_type); + + msg.msg_iov = iov; + msg.msg_iovlen = 6; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + int sock = connect_to_spawn_server(path, false); + if(sock == -1) + return false; + + int rc = sendmsg(sock, &msg, 0); + if (rc < 0) { + // cannot send the message + close(sock); + return false; + } + + // Receive response + struct status_report sr = { 0 }; + if (read(sock, &sr, sizeof(sr)) != sizeof(sr)) { + // cannot receive a ping reply + close(sock); + return false; + } + + close(sock); + return sr.status == STATUS_REPORT_PING; +} + +static bool spawn_server_send_request(SPAWN_REQUEST *request) { + bool ret = false; + + size_t env_size = 0; + void *encoded_env = encode_argv(request->environment, &env_size); + if (!encoded_env) + goto cleanup; + + size_t argv_size = 0; + void *encoded_argv = encode_argv(request->argv, &argv_size); + if (!encoded_argv) + goto cleanup; + + struct msghdr msg = {0}; + struct cmsghdr *cmsg; + SPAWN_SERVER_MSG msg_type = SPAWN_SERVER_MSG_REQUEST; + char cmsgbuf[CMSG_SPACE(sizeof(int) * SPAWN_SERVER_TRANSFER_FDS)]; + struct iovec iov[10]; + + + // We send 1 request with 10 iovec in it + // The request will be received in 2 parts + // 1. the first 6 iovec which include the sizes of the memory allocations required + // 2. the last 4 iovec which require the memory allocations to be received + + iov[0].iov_base = &msg_type; + iov[0].iov_len = sizeof(msg_type); + + iov[1].iov_base = &request->request_id; + iov[1].iov_len = sizeof(request->request_id); + + iov[2].iov_base = &env_size; + iov[2].iov_len = sizeof(env_size); + + iov[3].iov_base = &argv_size; + iov[3].iov_len = sizeof(argv_size); + + iov[4].iov_base = &request->data_size; + iov[4].iov_len = sizeof(request->data_size); + + iov[5].iov_base = &request->type; // Added this line + iov[5].iov_len = sizeof(request->type); + + iov[6].iov_base = encoded_env; + iov[6].iov_len = env_size; + + iov[7].iov_base = encoded_argv; + iov[7].iov_len = argv_size; + + iov[8].iov_base = (char *)request->data; + iov[8].iov_len = request->data_size; + + iov[9].iov_base = NULL; + iov[9].iov_len = 0; + + msg.msg_iov = iov; + msg.msg_iovlen = 10; + msg.msg_control = cmsgbuf; + msg.msg_controllen = CMSG_SPACE(sizeof(int) * SPAWN_SERVER_TRANSFER_FDS); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * SPAWN_SERVER_TRANSFER_FDS); + + memcpy(CMSG_DATA(cmsg), request->fds, sizeof(int) * SPAWN_SERVER_TRANSFER_FDS); + + int rc = sendmsg(request->socket, &msg, 0); + + if (rc < 0) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: Failed to sendmsg() request to spawn server using socket %d.", request->socket); + goto cleanup; + } + else { + ret = true; + // fprintf(stderr, "PARENT: sent request %zu on socket %d (fds: %d, %d, %d, %d) from tid %d\n", + // request->request_id, request->socket, request->fds[0], request->fds[1], request->fds[2], request->fds[3], os_gettid()); + } + +cleanup: + freez(encoded_env); + freez(encoded_argv); + return ret; +} + +static void spawn_server_receive_request(int sock, SPAWN_SERVER *server) { + struct msghdr msg = {0}; + struct iovec iov[6]; + SPAWN_SERVER_MSG msg_type = SPAWN_SERVER_MSG_INVALID; + size_t request_id; + size_t env_size; + size_t argv_size; + size_t data_size; + SPAWN_INSTANCE_TYPE type; + char cmsgbuf[CMSG_SPACE(sizeof(int) * SPAWN_SERVER_TRANSFER_FDS)]; + char *envp = NULL, *argv = NULL, *data = NULL; + int stdin_fd = -1, stdout_fd = -1, stderr_fd = -1, custom_fd = -1; + + // First recvmsg() to read sizes and control message + iov[0].iov_base = &msg_type; + iov[0].iov_len = sizeof(msg_type); + iov[1].iov_base = &request_id; + iov[1].iov_len = sizeof(request_id); + iov[2].iov_base = &env_size; + iov[2].iov_len = sizeof(env_size); + iov[3].iov_base = &argv_size; + iov[3].iov_len = sizeof(argv_size); + iov[4].iov_base = &data_size; + iov[4].iov_len = sizeof(data_size); + iov[5].iov_base = &type; + iov[5].iov_len = sizeof(type); + + msg.msg_iov = iov; + msg.msg_iovlen = 6; + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + if (recvmsg(sock, &msg, 0) < 0) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: failed to recvmsg() the first part of the request."); + return; + } + + if(msg_type == SPAWN_SERVER_MSG_PING) { + spawn_server_send_status_ping(sock); + return; + } + + // Extract file descriptors from control message + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); + if (cmsg == NULL || cmsg->cmsg_len != CMSG_LEN(sizeof(int) * SPAWN_SERVER_TRANSFER_FDS)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN SERVER: Received invalid control message (expected %zu bytes, received %zu bytes)", + CMSG_LEN(sizeof(int) * SPAWN_SERVER_TRANSFER_FDS), cmsg?cmsg->cmsg_len:0); + return; + } + + if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Received unexpected control message type."); + return; + } + + int *fds = (int *)CMSG_DATA(cmsg); + stdin_fd = fds[0]; + stdout_fd = fds[1]; + stderr_fd = fds[2]; + custom_fd = fds[3]; + + if (stdin_fd < 0 || stdout_fd < 0 || stderr_fd < 0) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "SPAWN SERVER: invalid file descriptors received, stdin = %d, stdout = %d, stderr = %d", + stdin_fd, stdout_fd, stderr_fd); + goto cleanup; + } + + // Second recvmsg() to read buffer contents + iov[0].iov_base = envp = mallocz(env_size); + iov[0].iov_len = env_size; + iov[1].iov_base = argv = mallocz(argv_size); + iov[1].iov_len = argv_size; + iov[2].iov_base = data = mallocz(data_size); + iov[2].iov_len = data_size; + + msg.msg_iov = iov; + msg.msg_iovlen = 3; + msg.msg_control = NULL; + msg.msg_controllen = 0; + + ssize_t total_bytes_received = recvmsg(sock, &msg, 0); + if (total_bytes_received < 0) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: failed to recvmsg() the second part of the request."); + goto cleanup; + } + + // fprintf(stderr, "SPAWN SERVER: received request %zu (fds: %d, %d, %d, %d)\n", request_id, + // stdin_fd, stdout_fd, stderr_fd, custom_fd); + + SPAWN_REQUEST *request = mallocz(sizeof(*request)); + *request = (SPAWN_REQUEST){ + .pid = 0, + .request_id = request_id, + .socket = sock, + .fds = { + [0] = stdin_fd, + [1] = stdout_fd, + [2] = stderr_fd, + [3] = custom_fd, + }, + .environment = decode_argv(envp, env_size), + .argv = decode_argv(argv, argv_size), + .data = data, + .data_size = data_size, + .type = type + }; + + pid_t pid = fork(); + if (pid == 0) { + // the child + spawn_server_run_child(server, request); + exit(1); + + } + else if (pid > 0) { + // the parent + request->pid = pid; + request->environment = NULL; + request->argv = NULL; + request->data = NULL; + request->data_size = 0; + request->fds[0] = -1; + request->fds[1] = -1; + request->fds[2] = -1; + request->fds[3] = -1; + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(spawn_server_requests, request, prev, next); + + // do not fork this socket on other children + sock_setcloexec(request->socket); + } + else { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Failed to fork() child."); + spawn_server_send_status_failure(stdout_fd); + freez(request); + } + +cleanup: + if(stdin_fd != -1) close(stdin_fd); + if(stdout_fd != -1) close(stdout_fd); + if(stderr_fd != -1) close(stderr_fd); + if(custom_fd != -1) close(custom_fd); + freez(envp); + freez(argv); + freez(data); +} + +// -------------------------------------------------------------------------------------------------------------------- +// the spawn server main event loop + +static void spawn_server_sigchld_handler(int signo __maybe_unused) { + spawn_server_sigchld = true; +} + +static void spawn_server_sigterm_handler(int signo __maybe_unused) { + spawn_server_exit = true; +} + +static SPAWN_REQUEST *find_request_by_pid(pid_t pid) { + for(SPAWN_REQUEST *rq = spawn_server_requests; rq ;rq = rq->next) + if(rq->pid == pid) + return rq; + + return NULL; +} + +static void spawn_server_process_sigchld(void) { + // nd_log(NDLS_COLLECTORS, NDLP_INFO, "SPAWN SERVER: checking for exited children"); + + int status; + pid_t pid; + + // Loop to check for exited child processes + while ((pid = waitpid((pid_t)(-1), &status, WNOHANG)) != 0) { + if(pid == -1) + break; + + SPAWN_REQUEST *rq = find_request_by_pid(pid); + size_t request_id = rq ? rq->request_id : 0; + bool send_report_remove_request = false; + + if(WIFEXITED(status)) { + nd_log(NDLS_COLLECTORS, NDLP_INFO, + "SPAWN SERVER: child with pid %d (request %zu) exited normally with exit code %d", + pid, request_id, WEXITSTATUS(status)); + send_report_remove_request = true; + } + else if(WIFSIGNALED(status)) { + if(WCOREDUMP(status)) + nd_log(NDLS_COLLECTORS, NDLP_INFO, + "SPAWN SERVER: child with pid %d (request %zu) coredump'd due to signal %d", + pid, request_id, WTERMSIG(status)); + else + nd_log(NDLS_COLLECTORS, NDLP_INFO, + "SPAWN SERVER: child with pid %d (request %zu) killed by signal %d", + pid, request_id, WTERMSIG(status)); + send_report_remove_request = true; + } + else if(WIFSTOPPED(status)) { + nd_log(NDLS_COLLECTORS, NDLP_INFO, + "SPAWN SERVER: child with pid %d (request %zu) stopped due to signal %d", + pid, request_id, WSTOPSIG(status)); + send_report_remove_request = false; + } + else if(WIFCONTINUED(status)) { + nd_log(NDLS_COLLECTORS, NDLP_INFO, + "SPAWN SERVER: child with pid %d (request %zu) continued due to signal %d", + pid, request_id, SIGCONT); + send_report_remove_request = false; + } + else { + nd_log(NDLS_COLLECTORS, NDLP_INFO, + "SPAWN SERVER: child with pid %d (request %zu) reports unhandled status", + pid, request_id); + send_report_remove_request = false; + } + + if(send_report_remove_request && rq) { + spawn_server_send_status_exit(rq->socket, status); + close(rq->socket); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(spawn_server_requests, rq, prev, next); + freez(rq); + } + } +} + +static void signals_unblock(void) { + sigset_t sigset; + sigfillset(&sigset); + + if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) { + netdata_log_error("SIGNAL: Could not unblock signals for threads"); + } +} + +static void spawn_server_event_loop(SPAWN_SERVER *server) { + int pipe_fd = server->pipe[1]; + close(server->pipe[0]); server->pipe[0] = -1; + + signals_unblock(); + + // Set up the signal handler for SIGCHLD and SIGTERM + struct sigaction sa; + sa.sa_handler = spawn_server_sigchld_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_NOCLDSTOP; + if (sigaction(SIGCHLD, &sa, NULL) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: sigaction() failed for SIGCHLD"); + exit(1); + } + + sa.sa_handler = spawn_server_sigterm_handler; + if (sigaction(SIGTERM, &sa, NULL) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: sigaction() failed for SIGTERM"); + exit(1); + } + + struct status_report sr = { + .status = STATUS_REPORT_STARTED, + .started = { + .pid = getpid(), + }, + }; + if (write(pipe_fd, &sr, sizeof(sr)) != sizeof(sr)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: failed to write initial status report."); + exit(1); + } + + struct pollfd fds[2]; + fds[0].fd = server->server_sock; + fds[0].events = POLLIN; + fds[1].fd = pipe_fd; + fds[1].events = POLLHUP | POLLERR; + + while(!spawn_server_exit) { + int ret = poll(fds, 2, -1); + if (spawn_server_sigchld) { + spawn_server_sigchld = false; + spawn_server_process_sigchld(); + + if(ret == -1) + continue; + } + + if (ret == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: poll() failed"); + break; + } + + if (fds[1].revents & (POLLHUP|POLLERR)) { + // Pipe has been closed (parent has exited) + nd_log(NDLS_COLLECTORS, NDLP_DEBUG, "SPAWN SERVER: Parent process has exited"); + break; + } + + if (fds[0].revents & POLLIN) { + int client_sock = accept(server->server_sock, NULL, NULL); + if (client_sock == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: accept() failed"); + continue; + } + + spawn_server_receive_request(client_sock, server); + } + } + + // Cleanup before exiting + unlink(server->path); + + // stop all children + if(spawn_server_requests) { + // nd_log(NDLS_COLLECTORS, NDLP_INFO, "SPAWN SERVER: killing all children..."); + size_t killed = 0; + for(SPAWN_REQUEST *rq = spawn_server_requests; rq ; rq = rq->next) { + kill(rq->pid, SIGTERM); + killed++; + } + while(spawn_server_requests) { + spawn_server_process_sigchld(); + tinysleep(); + } + // nd_log(NDLS_COLLECTORS, NDLP_INFO, "SPAWN SERVER: all %zu children finished", killed); + } + + exit(1); +} + +// -------------------------------------------------------------------------------------------------------------------- +// management of the spawn server + +void spawn_server_destroy(SPAWN_SERVER *server) { + if(server->pipe[0] != -1) close(server->pipe[0]); + if(server->pipe[1] != -1) close(server->pipe[1]); + if(server->server_sock != -1) close(server->server_sock); + + if(server->server_pid) { + kill(server->server_pid, SIGTERM); + waitpid(server->server_pid, NULL, 0); + } + + if(server->path) { + unlink(server->path); + freez(server->path); + } + + freez((void *)server->name); + freez(server); +} + +static bool spawn_server_create_listening_socket(SPAWN_SERVER *server) { + if(spawn_server_is_running(server->path)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Server is already listening on path '%s'", server->path); + return false; + } + + if ((server->server_sock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Failed to create socket()"); + return false; + } + + struct sockaddr_un server_addr = { + .sun_family = AF_UNIX, + }; + strcpy(server_addr.sun_path, server->path); + unlink(server->path); + errno = 0; + + if (bind(server->server_sock, (struct sockaddr *)&server_addr, sizeof(server_addr)) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Failed to bind()"); + return false; + } + + if (listen(server->server_sock, 5) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Failed to listen()"); + return false; + } + + return true; +} + +static void replace_stdio_with_dev_null() { + int dev_null_fd = open("/dev/null", O_RDWR); + if (dev_null_fd == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Failed to open /dev/null: %s", strerror(errno)); + return; + } + + // Redirect stdin (fd 0) + if (dup2(dev_null_fd, STDIN_FILENO) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Failed to redirect stdin to /dev/null: %s", strerror(errno)); + close(dev_null_fd); + return; + } + + // Redirect stdout (fd 1) + if (dup2(dev_null_fd, STDOUT_FILENO) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Failed to redirect stdout to /dev/null: %s", strerror(errno)); + close(dev_null_fd); + return; + } + + // Close the original /dev/null file descriptor + close(dev_null_fd); +} + +SPAWN_SERVER* spawn_server_create(const char *name, spawn_request_callback_t child_callback, int argc, const char **argv) { + SPAWN_SERVER *server = callocz(1, sizeof(SPAWN_SERVER)); + server->pipe[0] = -1; + server->pipe[1] = -1; + server->server_sock = -1; + server->cb = child_callback; + server->argc = argc; + server->argv = argv; + server->argv0_size = (argv && argv[0]) ? strlen(argv[0]) : 0; + + server->id = __atomic_add_fetch(&spawn_server_id, 1, __ATOMIC_RELAXED); + + char *runtime_directory = getenv("NETDATA_CACHE_DIR"); + if(runtime_directory && !*runtime_directory) runtime_directory = NULL; + if (runtime_directory) { + struct stat statbuf; + + if(!*runtime_directory) + // it is empty + runtime_directory = NULL; + + else if (stat(runtime_directory, &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) { + // it exists and it is a directory + + if (access(runtime_directory, W_OK) != 0) { + // it is not writable by us + nd_log(NDLS_COLLECTORS, NDLP_ERR, "Runtime directory '%s' is not writable, falling back to '/tmp'", runtime_directory); + runtime_directory = NULL; + } + } + else { + // it does not exist + nd_log(NDLS_COLLECTORS, NDLP_ERR, "Runtime directory '%s' does not exist, falling back to '/tmp'", runtime_directory); + runtime_directory = NULL; + } + } + if(!runtime_directory) + runtime_directory = "/tmp"; + + char path[1024]; + if(name && *name) { + server->name = strdupz(name); + snprintf(path, sizeof(path), "%s/.netdata-spawn-%s.sock", runtime_directory, name); + } + else { + snprintfz(path, sizeof(path), "%d-%zu", getpid(), server->id); + server->name = strdupz(path); + snprintf(path, sizeof(path), "%s/.netdata-spawn-%d-%zu.sock", runtime_directory, getpid(), server->id); + } + + server->path = strdupz(path); + + if (!spawn_server_create_listening_socket(server)) + goto cleanup; + + if (pipe(server->pipe) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Cannot create status pipe()"); + goto cleanup; + } + + pid_t pid = fork(); + if (pid == 0) { + // the child - the spawn server + { + char buf[15]; + snprintfz(buf, sizeof(buf), "spawn-%s", server->name); + os_setproctitle(buf, server->argc, server->argv); + } + + replace_stdio_with_dev_null(); + os_close_all_non_std_open_fds_except((int[]){ server->server_sock, server->pipe[1] }, 2); + spawn_server_event_loop(server); + } + else if (pid > 0) { + // the parent + server->server_pid = pid; + close(server->server_sock); server->server_sock = -1; + close(server->pipe[1]); server->pipe[1] = -1; + + struct status_report sr = { 0 }; + if (read(server->pipe[0], &sr, sizeof(sr)) != sizeof(sr)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: cannot read() initial status report from spawn server"); + goto cleanup; + } + + if(sr.status != STATUS_REPORT_STARTED) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: server did not respond with success."); + goto cleanup; + } + + if(sr.started.pid != server->server_pid) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: server sent pid %d but we have created %d.", sr.started.pid, server->server_pid); + goto cleanup; + } + + return server; + } + + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN SERVER: Cannot fork()"); + +cleanup: + spawn_server_destroy(server); + return NULL; +} + +// -------------------------------------------------------------------------------------------------------------------- +// creating spawn server instances + +void spawn_server_exec_destroy(SPAWN_INSTANCE *instance) { + if(instance->child_pid) kill(instance->child_pid, SIGTERM); + if(instance->write_fd != -1) close(instance->write_fd); + if(instance->read_fd != -1) close(instance->read_fd); + if(instance->client_sock != -1) close(instance->client_sock); + freez(instance); +} + +int spawn_server_exec_wait(SPAWN_SERVER *server __maybe_unused, SPAWN_INSTANCE *instance) { + int rc = -1; + + // close the child pipes, to make it exit + if(instance->write_fd != -1) { close(instance->write_fd); instance->write_fd = -1; } + if(instance->read_fd != -1) { close(instance->read_fd); instance->read_fd = -1; } + + // get the result + struct status_report sr = { 0 }; + if(read(instance->client_sock, &sr, sizeof(sr)) != sizeof(sr)) + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: failed to receive final status report for child %d, request %zu", instance->child_pid, instance->request_id); + + else switch(sr.status) { + case STATUS_REPORT_EXITED: + rc = sr.exited.waitpid_status; + break; + + case STATUS_REPORT_STARTED: + case STATUS_REPORT_FAILED: + default: + errno = 0; + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: invalid status report to exec spawn request %zu for pid %d (status = %u)", instance->request_id, instance->child_pid, sr.status); + break; + } + + instance->child_pid = 0; + spawn_server_exec_destroy(instance); + return rc; +} + +int spawn_server_exec_kill(SPAWN_SERVER *server, SPAWN_INSTANCE *instance) { + // kill the child, if it is still running + if(instance->child_pid) kill(instance->child_pid, SIGTERM); + return spawn_server_exec_wait(server, instance); +} + +SPAWN_INSTANCE* spawn_server_exec(SPAWN_SERVER *server, int stderr_fd, int custom_fd, const char **argv, const void *data, size_t data_size, SPAWN_INSTANCE_TYPE type) { + int pipe_stdin[2] = { -1, -1 }, pipe_stdout[2] = { -1, -1 }; + + SPAWN_INSTANCE *instance = callocz(1, sizeof(SPAWN_INSTANCE)); + instance->read_fd = -1; + instance->write_fd = -1; + + instance->client_sock = connect_to_spawn_server(server->path, true); + if(instance->client_sock == -1) + goto cleanup; + + if (pipe(pipe_stdin) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: Cannot create stdin pipe()"); + goto cleanup; + } + + if (pipe(pipe_stdout) == -1) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: Cannot create stdout pipe()"); + goto cleanup; + } + + SPAWN_REQUEST request = { + .request_id = __atomic_add_fetch(&server->request_id, 1, __ATOMIC_RELAXED), + .socket = instance->client_sock, + .fds = { + [0] = pipe_stdin[0], + [1] = pipe_stdout[1], + [2] = stderr_fd, + [3] = custom_fd, + }, + .environment = (const char **)environ, + .argv = argv, + .data = data, + .data_size = data_size, + .type = type + }; + + if(!spawn_server_send_request(&request)) + goto cleanup; + + close(pipe_stdin[0]); pipe_stdin[0] = -1; + instance->write_fd = pipe_stdin[1]; pipe_stdin[1] = -1; + + close(pipe_stdout[1]); pipe_stdout[1] = -1; + instance->read_fd = pipe_stdout[0]; pipe_stdout[0] = -1; + + struct status_report sr = { 0 }; + if(read(instance->client_sock, &sr, sizeof(sr)) != sizeof(sr)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: Failed to exec spawn request %zu (cannot get initial status report)", request.request_id); + goto cleanup; + } + + switch(sr.status) { + case STATUS_REPORT_STARTED: + instance->child_pid = sr.started.pid; + return instance; + + case STATUS_REPORT_FAILED: + errno = sr.failed.err_no; + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: Failed to exec spawn request %zu (check errno #1)", request.request_id); + errno = 0; + break; + + case STATUS_REPORT_EXITED: + errno = ENOEXEC; + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: Failed to exec spawn request %zu (check errno #2)", request.request_id); + errno = 0; + break; + + default: + errno = 0; + nd_log(NDLS_COLLECTORS, NDLP_ERR, "SPAWN PARENT: Invalid status report to exec spawn request %zu (received invalid data)", request.request_id); + break; + } + +cleanup: + if (pipe_stdin[0] >= 0) close(pipe_stdin[0]); + if (pipe_stdin[1] >= 0) close(pipe_stdin[1]); + if (pipe_stdout[0] >= 0) close(pipe_stdout[0]); + if (pipe_stdout[1] >= 0) close(pipe_stdout[1]); + spawn_server_exec_destroy(instance); + return NULL; +} + +#endif // !OS_WINDOWS diff --git a/src/libnetdata/spawn_server/spawn_server.h b/src/libnetdata/spawn_server/spawn_server.h new file mode 100644 index 0000000000..fb5e5bb0a0 --- /dev/null +++ b/src/libnetdata/spawn_server/spawn_server.h @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef SPAWN_SERVER_H +#define SPAWN_SERVER_H + +#define SPAWN_SERVER_TRANSFER_FDS 4 + +typedef enum { + SPAWN_INSTANCE_TYPE_EXEC = 0, +#if !defined(OS_WINDOWS) + SPAWN_INSTANCE_TYPE_CALLBACK = 1 +#endif +} SPAWN_INSTANCE_TYPE; + +// this is only used publicly for SPAWN_INSTANCE_TYPE_CALLBACK +// which is not available in Windows +typedef struct spawn_request { + size_t request_id; + pid_t pid; + int socket; + int fds[SPAWN_SERVER_TRANSFER_FDS]; // 0 = stdin, 1 = stdout, 2 = stderr, 3 = custom + const char **environment; + const char **argv; + const void *data; + size_t data_size; + SPAWN_INSTANCE_TYPE type; + struct spawn_request *prev, *next; +} SPAWN_REQUEST; + +typedef void (*spawn_request_callback_t)(SPAWN_REQUEST *request); + +typedef struct spawm_instance SPAWN_INSTANCE; +typedef struct spawn_server SPAWN_SERVER; + +SPAWN_SERVER* spawn_server_create(const char *name, spawn_request_callback_t child_callback, int argc, const char **argv); +void spawn_server_destroy(SPAWN_SERVER *server); + +SPAWN_INSTANCE* spawn_server_exec(SPAWN_SERVER *server, int stderr_fd, int custom_fd, const char **argv, const void *data, size_t data_size, SPAWN_INSTANCE_TYPE type); +int spawn_server_exec_kill(SPAWN_SERVER *server, SPAWN_INSTANCE *instance); +int spawn_server_exec_wait(SPAWN_SERVER *server, SPAWN_INSTANCE *instance); + +int spawn_server_instance_read_fd(SPAWN_INSTANCE *si); +int spawn_server_instance_write_fd(SPAWN_INSTANCE *si); +pid_t spawn_server_instance_pid(SPAWN_INSTANCE *si); +void spawn_server_instance_read_fd_unset(SPAWN_INSTANCE *si); +void spawn_server_instance_write_fd_unset(SPAWN_INSTANCE *si); + +#endif //SPAWN_SERVER_H diff --git a/src/logsmanagement/helper.h b/src/logsmanagement/helper.h index 6d1d51f76d..76fba9c709 100644 --- a/src/logsmanagement/helper.h +++ b/src/logsmanagement/helper.h @@ -95,7 +95,7 @@ static inline str2xx_errno str2int(int *out, char *s, int base) { // m_assert(0, "str2int error: STR2XX_INCONVERTIBLE"); return STR2XX_INCONVERTIBLE; } - errno = 0; + errno_clear(); long l = strtol(s, &end, base); /* Both checks are needed because INT_MAX == LONG_MAX is possible. */ if (unlikely(l > INT_MAX || (errno == ERANGE && l == LONG_MAX))){ @@ -124,7 +124,7 @@ static inline str2xx_errno str2float(float *out, char *s) { // m_assert(0, "str2float error: STR2XX_INCONVERTIBLE"); return STR2XX_INCONVERTIBLE; } - errno = 0; + errno_clear(); float f = strtof(s, &end); /* Both checks are needed because INT_MAX == LONG_MAX is possible. */ if (unlikely((errno == ERANGE && f == HUGE_VALF))){ diff --git a/src/registry/registry_db.c b/src/registry/registry_db.c index 448ca29d31..67c5312ed4 100644 --- a/src/registry/registry_db.c +++ b/src/registry/registry_db.c @@ -162,7 +162,7 @@ int registry_db_save(void) { fclose(fp); - errno = 0; + errno_clear(); // remove the .old db netdata_log_debug(D_REGISTRY, "REGISTRY: Removing old db '%s'", old_filename); diff --git a/src/spawn/README.md b/src/spawn/README.md deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/spawn/spawn.c b/src/spawn/spawn.c deleted file mode 100644 index a6e53718af..0000000000 --- a/src/spawn/spawn.c +++ /dev/null @@ -1,288 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "spawn.h" - -static uv_thread_t thread; -int spawn_thread_error; -int spawn_thread_shutdown; - -struct spawn_queue spawn_cmd_queue; - -static struct spawn_cmd_info *create_spawn_cmd(const char *command_to_run) -{ - struct spawn_cmd_info *cmdinfo; - - cmdinfo = mallocz(sizeof(*cmdinfo)); - fatal_assert(0 == uv_cond_init(&cmdinfo->cond)); - fatal_assert(0 == uv_mutex_init(&cmdinfo->mutex)); - cmdinfo->serial = 0; /* invalid */ - cmdinfo->command_to_run = strdupz(command_to_run); - cmdinfo->exit_status = -1; /* invalid */ - cmdinfo->pid = -1; /* invalid */ - cmdinfo->flags = 0; - - return cmdinfo; -} - -void destroy_spawn_cmd(struct spawn_cmd_info *cmdinfo) -{ - uv_cond_destroy(&cmdinfo->cond); - uv_mutex_destroy(&cmdinfo->mutex); - - freez(cmdinfo->command_to_run); - freez(cmdinfo); -} - -int spawn_cmd_compare(void *a, void *b) -{ - struct spawn_cmd_info *cmda = a, *cmdb = b; - - /* No need for mutex, serial will never change and the entries cannot be deallocated yet */ - if (cmda->serial < cmdb->serial) return -1; - if (cmda->serial > cmdb->serial) return 1; - - return 0; -} - -static void init_spawn_cmd_queue(void) -{ - spawn_cmd_queue.cmd_tree.root = NULL; - spawn_cmd_queue.cmd_tree.compar = spawn_cmd_compare; - spawn_cmd_queue.size = 0; - spawn_cmd_queue.latest_serial = 0; - fatal_assert(0 == uv_cond_init(&spawn_cmd_queue.cond)); - fatal_assert(0 == uv_mutex_init(&spawn_cmd_queue.mutex)); -} - -/* - * Returns serial number of the enqueued command - */ -uint64_t spawn_enq_cmd(const char *command_to_run) -{ - unsigned queue_size; - uint64_t serial; - avl_t *avl_ret; - struct spawn_cmd_info *cmdinfo; - - cmdinfo = create_spawn_cmd(command_to_run); - - /* wait for free space in queue */ - uv_mutex_lock(&spawn_cmd_queue.mutex); - while ((queue_size = spawn_cmd_queue.size) == SPAWN_MAX_OUTSTANDING) { - uv_cond_wait(&spawn_cmd_queue.cond, &spawn_cmd_queue.mutex); - } - fatal_assert(queue_size < SPAWN_MAX_OUTSTANDING); - spawn_cmd_queue.size = queue_size + 1; - - serial = ++spawn_cmd_queue.latest_serial; /* 0 is invalid */ - cmdinfo->serial = serial; /* No need to take the cmd mutex since it is unreachable at the moment */ - - /* enqueue command */ - avl_ret = avl_insert(&spawn_cmd_queue.cmd_tree, (avl_t *)cmdinfo); - fatal_assert(avl_ret == (avl_t *)cmdinfo); - uv_mutex_unlock(&spawn_cmd_queue.mutex); - - /* wake up event loop */ - fatal_assert(0 == uv_async_send(&spawn_async)); - return serial; -} - -/* - * Blocks until command with serial finishes running. Only one thread is allowed to wait per command. - */ -void spawn_wait_cmd(uint64_t serial, int *exit_status, time_t *exec_run_timestamp) -{ - avl_t *avl_ret; - struct spawn_cmd_info tmp, *cmdinfo; - - tmp.serial = serial; - - uv_mutex_lock(&spawn_cmd_queue.mutex); - avl_ret = avl_search(&spawn_cmd_queue.cmd_tree, (avl_t *)&tmp); - uv_mutex_unlock(&spawn_cmd_queue.mutex); - - fatal_assert(avl_ret); /* Could be NULL if more than 1 threads wait for the command */ - cmdinfo = (struct spawn_cmd_info *)avl_ret; - - uv_mutex_lock(&cmdinfo->mutex); - while (!(cmdinfo->flags & SPAWN_CMD_DONE)) { - /* Only 1 thread is allowed to wait for this command to finish */ - uv_cond_wait(&cmdinfo->cond, &cmdinfo->mutex); - } - uv_mutex_unlock(&cmdinfo->mutex); - - spawn_deq_cmd(cmdinfo); - *exit_status = cmdinfo->exit_status; - *exec_run_timestamp = cmdinfo->exec_run_timestamp; - - destroy_spawn_cmd(cmdinfo); -} - -void spawn_deq_cmd(struct spawn_cmd_info *cmdinfo) -{ - unsigned queue_size; - avl_t *avl_ret; - - uv_mutex_lock(&spawn_cmd_queue.mutex); - queue_size = spawn_cmd_queue.size; - fatal_assert(queue_size); - /* dequeue command */ - avl_ret = avl_remove(&spawn_cmd_queue.cmd_tree, (avl_t *)cmdinfo); - fatal_assert(avl_ret); - - spawn_cmd_queue.size = queue_size - 1; - - /* wake up callers */ - uv_cond_signal(&spawn_cmd_queue.cond); - uv_mutex_unlock(&spawn_cmd_queue.mutex); -} - -/* - * Must be called from the spawn client event loop context. This way no mutex is needed because the event loop is the - * only writer as far as struct spawn_cmd_info entries are concerned. - */ -static int find_unprocessed_spawn_cmd_cb(void *entry, void *data) -{ - struct spawn_cmd_info **cmdinfop = data, *cmdinfo = entry; - - if (!(cmdinfo->flags & SPAWN_CMD_PROCESSED)) { - *cmdinfop = cmdinfo; - return -1; /* break tree traversal */ - } - return 0; /* continue traversing */ -} - -struct spawn_cmd_info *spawn_get_unprocessed_cmd(void) -{ - struct spawn_cmd_info *cmdinfo; - unsigned queue_size; - int ret; - - uv_mutex_lock(&spawn_cmd_queue.mutex); - queue_size = spawn_cmd_queue.size; - if (queue_size == 0) { - uv_mutex_unlock(&spawn_cmd_queue.mutex); - return NULL; - } - /* find command */ - cmdinfo = NULL; - ret = avl_traverse(&spawn_cmd_queue.cmd_tree, find_unprocessed_spawn_cmd_cb, (void *)&cmdinfo); - if (-1 != ret) { /* no commands available for processing */ - uv_mutex_unlock(&spawn_cmd_queue.mutex); - return NULL; - } - uv_mutex_unlock(&spawn_cmd_queue.mutex); - - return cmdinfo; -} - -/** - * This function spawns a process that shares a libuv IPC pipe with the caller and performs spawn server duties. - * The spawn server process will close all open file descriptors except for the pipe, UV_STDOUT_FD, and UV_STDERR_FD. - * The caller has to be the netdata user as configured. - * - * @param loop the libuv loop of the caller context - * @param spawn_channel the bidirectional libuv IPC pipe that the server and the caller will share - * @param process the spawn server libuv process context - * @return 0 on success or the libuv error code - */ -int create_spawn_server(uv_loop_t *loop, uv_pipe_t *spawn_channel, uv_process_t *process) -{ - uv_process_options_t options = {0}; - char *args[3]; - int ret; -#define SPAWN_SERVER_DESCRIPTORS (3) - uv_stdio_container_t stdio[SPAWN_SERVER_DESCRIPTORS]; - struct passwd *passwd = NULL; - char *user = NULL; - - passwd = getpwuid(getuid()); - user = (passwd && passwd->pw_name) ? passwd->pw_name : ""; - - args[0] = netdata_exe_file; - args[1] = SPAWN_SERVER_COMMAND_LINE_ARGUMENT; - args[2] = NULL; - - memset(&options, 0, sizeof(options)); - options.file = netdata_exe_file; - options.args = args; - options.exit_cb = NULL; //exit_cb; - options.stdio = stdio; - options.stdio_count = SPAWN_SERVER_DESCRIPTORS; - - stdio[0].flags = UV_CREATE_PIPE | UV_READABLE_PIPE | UV_WRITABLE_PIPE; - stdio[0].data.stream = (uv_stream_t *)spawn_channel; /* bidirectional libuv pipe */ - stdio[1].flags = UV_INHERIT_FD; - stdio[1].data.fd = 1 /* UV_STDOUT_FD */; - stdio[2].flags = UV_INHERIT_FD; - stdio[2].data.fd = nd_log_health_fd() /* UV_STDERR_FD */; - - ret = uv_spawn(loop, process, &options); /* execute the netdata binary again as the netdata user */ - if (0 != ret) { - netdata_log_error("uv_spawn (process: \"%s\") (user: %s) failed (%s).", netdata_exe_file, user, uv_strerror(ret)); - fatal("Cannot start netdata without the spawn server."); - } - - return ret; -} - -#define CONCURRENT_SPAWNS 16 -#define SPAWN_ITERATIONS 10000 -#undef CONCURRENT_STRESS_TEST - -void spawn_init(void) -{ - struct completion completion; - int error; - - netdata_log_info("Initializing spawn client."); - - init_spawn_cmd_queue(); - - completion_init(&completion); - error = uv_thread_create(&thread, spawn_client, &completion); - if (error) { - netdata_log_error("uv_thread_create(): %s", uv_strerror(error)); - goto after_error; - } - /* wait for spawn client thread to initialize */ - completion_wait_for(&completion); - completion_destroy(&completion); - - if (spawn_thread_error) { - error = uv_thread_join(&thread); - if (error) { - netdata_log_error("uv_thread_create(): %s", uv_strerror(error)); - } - goto after_error; - } -#ifdef CONCURRENT_STRESS_TEST - signals_reset(); - signals_unblock(); - - sleep(60); - uint64_t serial[CONCURRENT_SPAWNS]; - for (int j = 0 ; j < SPAWN_ITERATIONS ; ++j) { - for (int i = 0; i < CONCURRENT_SPAWNS; ++i) { - char cmd[64]; - sprintf(cmd, "echo CONCURRENT_STRESS_TEST %d 1>&2", j * CONCURRENT_SPAWNS + i + 1); - serial[i] = spawn_enq_cmd(cmd); - netdata_log_info("Queued command %s for spawning.", cmd); - } - int exit_status; - time_t exec_run_timestamp; - for (int i = 0; i < CONCURRENT_SPAWNS; ++i) { - netdata_log_info("Started waiting for serial %llu exit status %d run timestamp %llu.", serial[i], exit_status, - exec_run_timestamp); - spawn_wait_cmd(serial[i], &exit_status, &exec_run_timestamp); - netdata_log_info("Finished waiting for serial %llu exit status %d run timestamp %llu.", serial[i], exit_status, - exec_run_timestamp); - } - } - exit(0); -#endif - return; - - after_error: - netdata_log_error("Failed to initialize spawn service. The alarms notifications will not be spawned."); -} diff --git a/src/spawn/spawn.h b/src/spawn/spawn.h deleted file mode 100644 index 6e9e51ef03..0000000000 --- a/src/spawn/spawn.h +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_SPAWN_H -#define NETDATA_SPAWN_H 1 - -#include "daemon/common.h" - -#define SPAWN_SERVER_COMMAND_LINE_ARGUMENT "--special-spawn-server" - -typedef enum spawn_protocol { - SPAWN_PROT_EXEC_CMD = 0, - SPAWN_PROT_SPAWN_RESULT, - SPAWN_PROT_CMD_EXIT_STATUS -} spawn_prot_t; - -struct spawn_prot_exec_cmd { - uint16_t command_length; - char command_to_run[]; -}; - -struct spawn_prot_spawn_result { - pid_t exec_pid; /* 0 if failed to spawn */ - time_t exec_run_timestamp; /* time of successfully spawning the command */ -}; - -struct spawn_prot_cmd_exit_status { - int exec_exit_status; -}; - -struct spawn_prot_header { - spawn_prot_t opcode; - void *handle; -}; - -#undef SPAWN_DEBUG /* define to enable debug prints */ - -#define SPAWN_MAX_OUTSTANDING (32768) - -#define SPAWN_CMD_PROCESSED 0x00000001 -#define SPAWN_CMD_IN_PROGRESS 0x00000002 -#define SPAWN_CMD_FAILED_TO_SPAWN 0x00000004 -#define SPAWN_CMD_DONE 0x00000008 - -struct spawn_cmd_info { - avl_t avl; - - /* concurrency control per command */ - uv_mutex_t mutex; - uv_cond_t cond; /* users block here until command has finished */ - - uint64_t serial; - char *command_to_run; - int exit_status; - pid_t pid; - unsigned long flags; - time_t exec_run_timestamp; /* time of successfully spawning the command */ -}; - -/* spawn command queue */ -struct spawn_queue { - avl_tree_type cmd_tree; - - /* concurrency control of command queue */ - uv_mutex_t mutex; - uv_cond_t cond; - - volatile unsigned size; - uint64_t latest_serial; -}; - -struct write_context { - uv_write_t write_req; - struct spawn_prot_header header; - struct spawn_prot_cmd_exit_status exit_status; - struct spawn_prot_spawn_result spawn_result; - struct spawn_prot_exec_cmd payload; -}; - -extern int spawn_thread_error; -extern int spawn_thread_shutdown; -extern uv_async_t spawn_async; - -void spawn_init(void); -void spawn_server(void); -void spawn_client(void *arg); -void destroy_spawn_cmd(struct spawn_cmd_info *cmdinfo); -uint64_t spawn_enq_cmd(const char *command_to_run); -void spawn_wait_cmd(uint64_t serial, int *exit_status, time_t *exec_run_timestamp); -void spawn_deq_cmd(struct spawn_cmd_info *cmdinfo); -struct spawn_cmd_info *spawn_get_unprocessed_cmd(void); -int create_spawn_server(uv_loop_t *loop, uv_pipe_t *spawn_channel, uv_process_t *process); - -/* - * Copies from the source buffer to the protocol buffer. It advances the source buffer by the amount copied. It - * subtracts the amount copied from the source length. - */ -static inline void copy_to_prot_buffer(char *prot_buffer, unsigned *prot_buffer_len, unsigned max_to_copy, - char **source, unsigned *source_len) -{ - unsigned to_copy; - - to_copy = MIN(max_to_copy, *source_len); - memcpy(prot_buffer + *prot_buffer_len, *source, to_copy); - *prot_buffer_len += to_copy; - *source += to_copy; - *source_len -= to_copy; -} - -#endif //NETDATA_SPAWN_H diff --git a/src/spawn/spawn_client.c b/src/spawn/spawn_client.c deleted file mode 100644 index f2af9842ca..0000000000 --- a/src/spawn/spawn_client.c +++ /dev/null @@ -1,250 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "spawn.h" - -static uv_process_t process; -static uv_pipe_t spawn_channel; -static uv_loop_t *loop; -uv_async_t spawn_async; - -static char prot_buffer[MAX_COMMAND_LENGTH]; -static unsigned prot_buffer_len = 0; - -static void async_cb(uv_async_t *handle) -{ - uv_stop(handle->loop); -} - -static void after_pipe_write(uv_write_t* req, int status) -{ - (void)status; -#ifdef SPAWN_DEBUG - netdata_log_info("CLIENT %s called status=%d", __func__, status); -#endif - void **data = req->data; - freez(data[0]); - freez(data[1]); - freez(data); -} - -static void client_parse_spawn_protocol(unsigned source_len, char *source) -{ - unsigned required_len; - struct spawn_prot_header *header; - struct spawn_prot_spawn_result *spawn_result; - struct spawn_prot_cmd_exit_status *exit_status; - struct spawn_cmd_info *cmdinfo; - - while (source_len) { - required_len = sizeof(*header); - if (prot_buffer_len < required_len) - copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len); - if (prot_buffer_len < required_len) - return; /* Source buffer ran out */ - - header = (struct spawn_prot_header *)prot_buffer; - cmdinfo = (struct spawn_cmd_info *)header->handle; - fatal_assert(NULL != cmdinfo); - - switch(header->opcode) { - case SPAWN_PROT_SPAWN_RESULT: - required_len += sizeof(*spawn_result); - if (prot_buffer_len < required_len) - copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len); - if (prot_buffer_len < required_len) - return; /* Source buffer ran out */ - - spawn_result = (struct spawn_prot_spawn_result *)(header + 1); - uv_mutex_lock(&cmdinfo->mutex); - cmdinfo->pid = spawn_result->exec_pid; - if (0 == cmdinfo->pid) { /* Failed to spawn */ -#ifdef SPAWN_DEBUG - netdata_log_info("CLIENT %s SPAWN_PROT_SPAWN_RESULT failed to spawn.", __func__); -#endif - cmdinfo->flags |= SPAWN_CMD_FAILED_TO_SPAWN | SPAWN_CMD_DONE; - uv_cond_signal(&cmdinfo->cond); - } else { - cmdinfo->exec_run_timestamp = spawn_result->exec_run_timestamp; - cmdinfo->flags |= SPAWN_CMD_IN_PROGRESS; -#ifdef SPAWN_DEBUG - netdata_log_info("CLIENT %s SPAWN_PROT_SPAWN_RESULT in progress.", __func__); -#endif - } - uv_mutex_unlock(&cmdinfo->mutex); - prot_buffer_len = 0; - break; - case SPAWN_PROT_CMD_EXIT_STATUS: - required_len += sizeof(*exit_status); - if (prot_buffer_len < required_len) - copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len); - if (prot_buffer_len < required_len) - return; /* Source buffer ran out */ - - exit_status = (struct spawn_prot_cmd_exit_status *)(header + 1); - uv_mutex_lock(&cmdinfo->mutex); - cmdinfo->exit_status = exit_status->exec_exit_status; -#ifdef SPAWN_DEBUG - netdata_log_info("CLIENT %s SPAWN_PROT_CMD_EXIT_STATUS %d.", __func__, exit_status->exec_exit_status); -#endif - cmdinfo->flags |= SPAWN_CMD_DONE; - uv_cond_signal(&cmdinfo->cond); - uv_mutex_unlock(&cmdinfo->mutex); - prot_buffer_len = 0; - break; - default: - fatal_assert(0); - break; - } - - } -} - -static void on_pipe_read(uv_stream_t* pipe, ssize_t nread, const uv_buf_t* buf) -{ - if (0 == nread) { - netdata_log_info("%s: Zero bytes read from spawn pipe.", __func__); - } else if (UV_EOF == nread) { - netdata_log_info("EOF found in spawn pipe."); - } else if (nread < 0) { - netdata_log_error("%s: %s", __func__, uv_strerror(nread)); - } - - if (nread < 0) { /* stop stream due to EOF or error */ - (void)uv_read_stop((uv_stream_t *)pipe); - } else if (nread) { -#ifdef SPAWN_DEBUG - netdata_log_info("CLIENT %s read %u", __func__, (unsigned)nread); -#endif - client_parse_spawn_protocol(nread, buf->base); - } - if (buf && buf->len) { - freez(buf->base); - } - - if (nread < 0) { - uv_close((uv_handle_t *)pipe, NULL); - } -} - -static void on_read_alloc(uv_handle_t* handle, - size_t suggested_size, - uv_buf_t* buf) -{ - (void)handle; - buf->base = mallocz(suggested_size); - buf->len = suggested_size; -} - -static void spawn_process_cmd(struct spawn_cmd_info *cmdinfo) -{ - int ret; - uv_buf_t *writebuf; - struct write_context *write_ctx; - - void **data = callocz(2, sizeof(void *)); - writebuf = callocz(3, sizeof(uv_buf_t)); - write_ctx = callocz(1, sizeof(*write_ctx)); - - data[0] = write_ctx; - data[1] = writebuf; - write_ctx->write_req.data = data; - - uv_mutex_lock(&cmdinfo->mutex); - cmdinfo->flags |= SPAWN_CMD_PROCESSED; - uv_mutex_unlock(&cmdinfo->mutex); - - write_ctx->header.opcode = SPAWN_PROT_EXEC_CMD; - write_ctx->header.handle = cmdinfo; - write_ctx->payload.command_length = strlen(cmdinfo->command_to_run); - - writebuf[0] = uv_buf_init((char *)&write_ctx->header, sizeof(write_ctx->header)); - writebuf[1] = uv_buf_init((char *)&write_ctx->payload, sizeof(write_ctx->payload)); - writebuf[2] = uv_buf_init((char *)cmdinfo->command_to_run, write_ctx->payload.command_length); - -#ifdef SPAWN_DEBUG - netdata_log_info("CLIENT %s SPAWN_PROT_EXEC_CMD %u", __func__, (unsigned)cmdinfo->serial); -#endif - ret = uv_write(&write_ctx->write_req, (uv_stream_t *)&spawn_channel, writebuf, 3, after_pipe_write); - fatal_assert(ret == 0); -} - -void spawn_client(void *arg) -{ - uv_thread_set_name_np("DAEMON_SPAWN"); - - int ret; - struct completion *completion = (struct completion *)arg; - - loop = mallocz(sizeof(uv_loop_t)); - ret = uv_loop_init(loop); - if (ret) { - netdata_log_error("uv_loop_init(): %s", uv_strerror(ret)); - spawn_thread_error = ret; - goto error_after_loop_init; - } - loop->data = NULL; - - spawn_async.data = NULL; - ret = uv_async_init(loop, &spawn_async, async_cb); - if (ret) { - netdata_log_error("uv_async_init(): %s", uv_strerror(ret)); - spawn_thread_error = ret; - goto error_after_async_init; - } - - ret = uv_pipe_init(loop, &spawn_channel, 1); - if (ret) { - netdata_log_error("uv_pipe_init(): %s", uv_strerror(ret)); - spawn_thread_error = ret; - goto error_after_pipe_init; - } - fatal_assert(spawn_channel.ipc); - - ret = create_spawn_server(loop, &spawn_channel, &process); - if (ret) { - netdata_log_error("Failed to fork spawn server process."); - spawn_thread_error = ret; - goto error_after_spawn_server; - } - - spawn_thread_error = 0; - spawn_thread_shutdown = 0; - /* wake up initialization thread */ - completion_mark_complete(completion); - - prot_buffer_len = 0; - ret = uv_read_start((uv_stream_t *)&spawn_channel, on_read_alloc, on_pipe_read); - fatal_assert(ret == 0); - - while (spawn_thread_shutdown == 0) { - struct spawn_cmd_info *cmdinfo; - - uv_run(loop, UV_RUN_DEFAULT); - while (NULL != (cmdinfo = spawn_get_unprocessed_cmd())) { - spawn_process_cmd(cmdinfo); - } - } - /* cleanup operations of the event loop */ - netdata_log_info("Shutting down spawn client event loop."); - uv_close((uv_handle_t *)&spawn_channel, NULL); - uv_close((uv_handle_t *)&spawn_async, NULL); - uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */ - - netdata_log_info("Shutting down spawn client loop complete."); - fatal_assert(0 == uv_loop_close(loop)); - - return; - -error_after_spawn_server: - uv_close((uv_handle_t *)&spawn_channel, NULL); -error_after_pipe_init: - uv_close((uv_handle_t *)&spawn_async, NULL); -error_after_async_init: - uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */ - fatal_assert(0 == uv_loop_close(loop)); -error_after_loop_init: - freez(loop); - - /* wake up initialization thread */ - completion_mark_complete(completion); -} diff --git a/src/spawn/spawn_server.c b/src/spawn/spawn_server.c deleted file mode 100644 index f17669368f..0000000000 --- a/src/spawn/spawn_server.c +++ /dev/null @@ -1,386 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "spawn.h" - -static uv_loop_t *loop; -static uv_pipe_t server_pipe; - -static int server_shutdown = 0; - -static uv_thread_t thread; - -/* spawn outstanding execution structure */ -static avl_tree_lock spawn_outstanding_exec_tree; - -static char prot_buffer[MAX_COMMAND_LENGTH]; -static unsigned prot_buffer_len = 0; - -struct spawn_execution_info { - avl_t avl; - - void *handle; - int exit_status; - pid_t pid; - struct spawn_execution_info *next; -}; - -int spawn_exec_compare(void *a, void *b) -{ - struct spawn_execution_info *spwna = a, *spwnb = b; - - if (spwna->pid < spwnb->pid) return -1; - if (spwna->pid > spwnb->pid) return 1; - - return 0; -} - -/* wake up waiter thread to reap the spawned processes */ -static uv_mutex_t wait_children_mutex; -static uv_cond_t wait_children_cond; -static uint8_t spawned_processes; -static struct spawn_execution_info *child_waited_list; -static uv_async_t child_waited_async; - -static inline struct spawn_execution_info *dequeue_child_waited_list(void) -{ - struct spawn_execution_info *exec_info; - - uv_mutex_lock(&wait_children_mutex); - if (NULL == child_waited_list) { - exec_info = NULL; - } else { - exec_info = child_waited_list; - child_waited_list = exec_info->next; - } - uv_mutex_unlock(&wait_children_mutex); - - return exec_info; -} - -static inline void enqueue_child_waited_list(struct spawn_execution_info *exec_info) -{ - uv_mutex_lock(&wait_children_mutex); - exec_info->next = child_waited_list; - child_waited_list = exec_info; - uv_mutex_unlock(&wait_children_mutex); -} - -static void after_pipe_write(uv_write_t *req, int status) -{ - (void)status; -#ifdef SPAWN_DEBUG - fprintf(stderr, "SERVER %s called status=%d\n", __func__, status); -#endif - void **data = req->data; - freez(data[0]); - freez(data[1]); - freez(data); -} - -static void child_waited_async_cb(uv_async_t *async_handle) -{ - uv_buf_t *writebuf; - int ret; - struct spawn_execution_info *exec_info; - struct write_context *write_ctx; - - (void)async_handle; - while (NULL != (exec_info = dequeue_child_waited_list())) { - write_ctx = mallocz(sizeof(*write_ctx)); - - void **data = callocz(2, sizeof(void *)); - writebuf = callocz(2, sizeof(uv_buf_t)); - - data[0] = write_ctx; - data[1] = writebuf; - write_ctx->write_req.data = data; - - write_ctx->header.opcode = SPAWN_PROT_CMD_EXIT_STATUS; - write_ctx->header.handle = exec_info->handle; - write_ctx->exit_status.exec_exit_status = exec_info->exit_status; - writebuf[0] = uv_buf_init((char *) &write_ctx->header, sizeof(write_ctx->header)); - writebuf[1] = uv_buf_init((char *) &write_ctx->exit_status, sizeof(write_ctx->exit_status)); -#ifdef SPAWN_DEBUG - fprintf(stderr, "SERVER %s SPAWN_PROT_CMD_EXIT_STATUS\n", __func__); -#endif - ret = uv_write(&write_ctx->write_req, (uv_stream_t *) &server_pipe, writebuf, 2, after_pipe_write); - fatal_assert(ret == 0); - - freez(exec_info); - } -} - -static void wait_children(void *arg) -{ - siginfo_t i; - struct spawn_execution_info tmp, *exec_info; - avl_t *ret_avl; - - (void)arg; - while (!server_shutdown) { - uv_mutex_lock(&wait_children_mutex); - while (!spawned_processes) { - uv_cond_wait(&wait_children_cond, &wait_children_mutex); - } - spawned_processes = 0; - uv_mutex_unlock(&wait_children_mutex); - - while (!server_shutdown) { - i.si_pid = 0; - if (os_waitid(P_ALL, (id_t) 0, &i, WEXITED) == -1) { - if (errno != ECHILD) - fprintf(stderr, "SPAWN: Failed to wait: %s\n", strerror(errno)); - break; - } - if (i.si_pid == 0) { - fprintf(stderr, "SPAWN: No child exited.\n"); - break; - } -#ifdef SPAWN_DEBUG - fprintf(stderr, "SPAWN: Successfully waited for pid:%d.\n", (int) i.si_pid); -#endif - fatal_assert(CLD_EXITED == i.si_code); - tmp.pid = (pid_t)i.si_pid; - while (NULL == (ret_avl = avl_remove_lock(&spawn_outstanding_exec_tree, (avl_t *)&tmp))) { - fprintf(stderr, - "SPAWN: race condition detected, waiting for child process %d to be indexed.\n", - (int)tmp.pid); - (void)sleep_usec(10000); /* 10 msec */ - } - exec_info = (struct spawn_execution_info *)ret_avl; - exec_info->exit_status = i.si_status; - enqueue_child_waited_list(exec_info); - - /* wake up event loop */ - fatal_assert(0 == uv_async_send(&child_waited_async)); - } - } -} - -void spawn_protocol_execute_command(void *handle, char *command_to_run, uint16_t command_length) -{ - uv_buf_t *writebuf; - int ret; - avl_t *avl_ret; - struct spawn_execution_info *exec_info; - struct write_context *write_ctx; - - write_ctx = mallocz(sizeof(*write_ctx)); - void **data = callocz(2, sizeof(void *)); - writebuf = callocz(2, sizeof(uv_buf_t)); - data[0] = write_ctx; - data[1] = writebuf; - write_ctx->write_req.data = data; - - command_to_run[command_length] = '\0'; -#ifdef SPAWN_DEBUG - fprintf(stderr, "SPAWN: executing command '%s'\n", command_to_run); -#endif - if (netdata_spawn(command_to_run, &write_ctx->spawn_result.exec_pid)) { - fprintf(stderr, "SPAWN: Cannot spawn(\"%s\", \"r\").\n", command_to_run); - write_ctx->spawn_result.exec_pid = 0; - } else { /* successfully spawned command */ - write_ctx->spawn_result.exec_run_timestamp = now_realtime_sec(); - - /* record it for when the process finishes execution */ - exec_info = mallocz(sizeof(*exec_info)); - exec_info->handle = handle; - exec_info->pid = write_ctx->spawn_result.exec_pid; - avl_ret = avl_insert_lock(&spawn_outstanding_exec_tree, (avl_t *)exec_info); - fatal_assert(avl_ret == (avl_t *)exec_info); - - /* wake up the thread that blocks waiting for processes to exit */ - uv_mutex_lock(&wait_children_mutex); - spawned_processes = 1; - uv_cond_signal(&wait_children_cond); - uv_mutex_unlock(&wait_children_mutex); - } - - write_ctx->header.opcode = SPAWN_PROT_SPAWN_RESULT; - write_ctx->header.handle = handle; - writebuf[0] = uv_buf_init((char *)&write_ctx->header, sizeof(write_ctx->header)); - writebuf[1] = uv_buf_init((char *)&write_ctx->spawn_result, sizeof(write_ctx->spawn_result)); -#ifdef SPAWN_DEBUG - fprintf(stderr, "SERVER %s SPAWN_PROT_SPAWN_RESULT\n", __func__); -#endif - ret = uv_write(&write_ctx->write_req, (uv_stream_t *)&server_pipe, writebuf, 2, after_pipe_write); - fatal_assert(ret == 0); -} - -static void server_parse_spawn_protocol(unsigned source_len, char *source) -{ - unsigned required_len; - struct spawn_prot_header *header; - struct spawn_prot_exec_cmd *payload; - uint16_t command_length; - - while (source_len) { - required_len = sizeof(*header); - if (prot_buffer_len < required_len) - copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len); - if (prot_buffer_len < required_len) - return; /* Source buffer ran out */ - - header = (struct spawn_prot_header *)prot_buffer; - fatal_assert(SPAWN_PROT_EXEC_CMD == header->opcode); - fatal_assert(NULL != header->handle); - - required_len += sizeof(*payload); - if (prot_buffer_len < required_len) - copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len); - if (prot_buffer_len < required_len) - return; /* Source buffer ran out */ - - payload = (struct spawn_prot_exec_cmd *)(header + 1); - command_length = payload->command_length; - - required_len += command_length; - if (unlikely(required_len > MAX_COMMAND_LENGTH - 1)) { - fprintf(stderr, "SPAWN: Ran out of protocol buffer space.\n"); - command_length = (MAX_COMMAND_LENGTH - 1) - (sizeof(*header) + sizeof(*payload)); - required_len = MAX_COMMAND_LENGTH - 1; - } - if (prot_buffer_len < required_len) - copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len); - if (prot_buffer_len < required_len) - return; /* Source buffer ran out */ - - spawn_protocol_execute_command(header->handle, payload->command_to_run, command_length); - prot_buffer_len = 0; - } -} - -static void on_pipe_read(uv_stream_t *pipe, ssize_t nread, const uv_buf_t *buf) -{ - if (0 == nread) { - fprintf(stderr, "SERVER %s: Zero bytes read from spawn pipe.\n", __func__); - } else if (UV_EOF == nread) { - fprintf(stderr, "EOF found in spawn pipe.\n"); - } else if (nread < 0) { - fprintf(stderr, "%s: %s\n", __func__, uv_strerror(nread)); - } - - if (nread < 0) { /* stop spawn server due to EOF or error */ - int error; - - uv_mutex_lock(&wait_children_mutex); - server_shutdown = 1; - spawned_processes = 1; - uv_cond_signal(&wait_children_cond); - uv_mutex_unlock(&wait_children_mutex); - - fprintf(stderr, "Shutting down spawn server event loop.\n"); - /* cleanup operations of the event loop */ - (void)uv_read_stop((uv_stream_t *) pipe); - uv_close((uv_handle_t *)&server_pipe, NULL); - - error = uv_thread_join(&thread); - if (error) { - fprintf(stderr, "uv_thread_create(): %s", uv_strerror(error)); - } - /* After joining it is safe to destroy child_waited_async */ - uv_close((uv_handle_t *)&child_waited_async, NULL); - } else if (nread) { -#ifdef SPAWN_DEBUG - fprintf(stderr, "SERVER %s nread %u\n", __func__, (unsigned)nread); -#endif - server_parse_spawn_protocol(nread, buf->base); - } - if (buf && buf->len) { - freez(buf->base); - } -} - -static void on_read_alloc(uv_handle_t *handle, - size_t suggested_size, - uv_buf_t* buf) -{ - (void)handle; - buf->base = mallocz(suggested_size); - buf->len = suggested_size; -} - -static void ignore_signal_handler(int signo) { - /* - * By having a signal handler we allow spawned processes to reset default signal dispositions. Setting SIG_IGN - * would be inherited by the spawned children which is not desirable. - */ - (void)signo; -} - -void spawn_server(void) -{ - int error; - - // initialize the system clocks - clocks_init(); - - // close all open file descriptors, except the standard ones - // the caller may have left open files (lxc-attach has this issue) - for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR); - - // Have the libuv IPC pipe be closed when forking child processes - (void) fcntl(0, F_SETFD, FD_CLOEXEC); - fprintf(stderr, "Spawn server is up.\n"); - - // Define signals we want to ignore - struct sigaction sa; - int signals_to_ignore[] = {SIGPIPE, SIGINT, SIGQUIT, SIGTERM, SIGHUP, SIGUSR1, SIGUSR2, SIGBUS, SIGCHLD}; - unsigned ignore_length = sizeof(signals_to_ignore) / sizeof(signals_to_ignore[0]); - - unsigned i; - for (i = 0; i < ignore_length ; ++i) { - sa.sa_flags = 0; - sigemptyset(&sa.sa_mask); - sa.sa_handler = ignore_signal_handler; - if(sigaction(signals_to_ignore[i], &sa, NULL) == -1) - fprintf(stderr, "SPAWN: Failed to change signal handler for signal: %d.\n", signals_to_ignore[i]); - } - - signals_unblock(); - - loop = uv_default_loop(); - loop->data = NULL; - - error = uv_pipe_init(loop, &server_pipe, 1); - if (error) { - fprintf(stderr, "uv_pipe_init(): %s\n", uv_strerror(error)); - exit(error); - } - fatal_assert(server_pipe.ipc); - - error = uv_pipe_open(&server_pipe, 0 /* UV_STDIN_FD */); - if (error) { - fprintf(stderr, "uv_pipe_open(): %s\n", uv_strerror(error)); - exit(error); - } - avl_init_lock(&spawn_outstanding_exec_tree, spawn_exec_compare); - - spawned_processes = 0; - fatal_assert(0 == uv_cond_init(&wait_children_cond)); - fatal_assert(0 == uv_mutex_init(&wait_children_mutex)); - child_waited_list = NULL; - error = uv_async_init(loop, &child_waited_async, child_waited_async_cb); - if (error) { - fprintf(stderr, "uv_async_init(): %s\n", uv_strerror(error)); - exit(error); - } - - error = uv_thread_create(&thread, wait_children, NULL); - if (error) { - fprintf(stderr, "uv_thread_create(): %s\n", uv_strerror(error)); - exit(error); - } - - prot_buffer_len = 0; - error = uv_read_start((uv_stream_t *)&server_pipe, on_read_alloc, on_pipe_read); - fatal_assert(error == 0); - - while (!server_shutdown) { - uv_run(loop, UV_RUN_DEFAULT); - } - fprintf(stderr, "Shutting down spawn server loop complete.\n"); - fatal_assert(0 == uv_loop_close(loop)); - - exit(0); -} diff --git a/src/streaming/receiver.c b/src/streaming/receiver.c index 2cbf247dc4..50da031a71 100644 --- a/src/streaming/receiver.c +++ b/src/streaming/receiver.c @@ -70,7 +70,7 @@ static inline int read_stream(struct receiver_state *r, char* buffer, size_t siz ssize_t bytes_read; do { - errno = 0; + errno_clear(); switch(wait_on_socket_or_cancel_with_timeout( #ifdef ENABLE_HTTPS diff --git a/src/streaming/rrdpush.c b/src/streaming/rrdpush.c index 1ce8e4ea84..23a86e7207 100644 --- a/src/streaming/rrdpush.c +++ b/src/streaming/rrdpush.c @@ -54,7 +54,7 @@ char *netdata_ssl_ca_file = NULL; #endif static void load_stream_conf() { - errno = 0; + errno_clear(); char *filename = strdupz_path_subpath(netdata_configured_user_config_dir, "stream.conf"); if(!appconfig_load(&stream_config, filename, 0, NULL)) { nd_log_daemon(NDLP_NOTICE, "CONFIG: cannot load user config '%s'. Will try stock config.", filename); diff --git a/src/streaming/sender.c b/src/streaming/sender.c index 3432e69276..a5fbe6044e 100644 --- a/src/streaming/sender.c +++ b/src/streaming/sender.c @@ -1894,7 +1894,7 @@ void *rrdpush_sender_thread(void *ptr) { // protection from overflow if(unlikely(s->flags & SENDER_FLAG_OVERFLOW)) { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_OVERFLOW); - errno = 0; + errno_clear(); netdata_log_error("STREAM %s [send to %s]: buffer full (allocated %zu bytes) after sending %zu bytes. Restarting connection", rrdhost_hostname(s->host), s->connected_to, s->buffer->size, s->sent_bytes_on_this_connection); rrdpush_sender_thread_close_socket(s->host); diff --git a/src/web/api/queries/query.c b/src/web/api/queries/query.c index c97b546b19..6854300f3d 100644 --- a/src/web/api/queries/query.c +++ b/src/web/api/queries/query.c @@ -691,7 +691,7 @@ static void rrdr_set_grouping_function(RRDR *r, RRDR_TIME_GROUPING group_method) } } if(!found) { - errno = 0; + errno_clear(); internal_error(true, "QUERY: grouping method %u not found. Using 'average'", (unsigned int)group_method); r->time_grouping.create = tg_average_create; r->time_grouping.reset = tg_average_reset; diff --git a/src/web/server/web_client.c b/src/web/server/web_client.c index 27fcf29c75..ca1c28e7f9 100644 --- a/src/web/server/web_client.c +++ b/src/web/server/web_client.c @@ -1855,7 +1855,7 @@ ssize_t web_client_receive(struct web_client *w) // do we have any space for more data? buffer_need_bytes(w->response.data, NETDATA_WEB_REQUEST_INITIAL_SIZE); - errno = 0; + errno_clear(); #ifdef ENABLE_HTTPS if ( (web_client_check_conn_tcp(w)) && (netdata_ssl_web_server_ctx) ) {