0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-06 14:35:32 +00:00

spawn server (Windows support for external plugins) ()

* listening ipv6 sockets may be both ipv4 and ipv6, depending on the IPV6_ONLY flag

* working libmnl ipv46 detection and added latency and retransmits from TCP_INFO

* fix aggregations for rtt and retrans

* code cleanup

* code cleanup

* code cleanup again

* restore field renames

* count namespaces

* run namespaces in parallel

* add libmnl to buildinfo

* lock around safe_fork()

* libmnl ports are in network byte order

* posix spawn server for both executables and callback functions

* local-sockets and network-viewer using the new spawn server

* cleanup spawn servers sockets

* spawn server stdin and stdout are linked to /dev/null

* no need for spinlock in spawn server

* empty all parameters

* new spawn server is now used for plugins.d plugins

* fix for environ

* claiming script runs via the new spawn server

* tc.plugin uses the new spawn server

* analytics, buildinfo and cgroups.plugin use the new spawn server

* cgroup-discovery uses the new spawn server

* added ability to wait or kill spawned processes

* removed old spawn server and now alert notifications use the new one

* remove left-overs

* hide spawn server internals; started working on windows version of the spawn server

* fixes for windows

* more windows work

* more work on windows

* added debug log to spawn server

* fix compilation warnings

* enable static threads on windows

* running external plugins

* working spawn server on windows

* spawn server logs to collectoers.log

* log windows last error together with errno

* log updates

* cleanup

* decode_argv does not add an empty parameter

* removed debug log

* removed debug return

* rework on close_range()

* eliminate the need for waitid()

* clear errno on the signal handler

* added universal os_setproctitle() call to support FreeBSD too

* os_get_pid_max() for windows and macos

* isolate pids array from the rest of the code in apps.plugin so that it can be turned to a hashtable
This commit is contained in:
Costa Tsaousis 2024-07-10 14:23:29 +03:00 committed by GitHub
parent 8e98857ebf
commit 166e9caffa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
83 changed files with 2781 additions and 2458 deletions

View file

@ -668,8 +668,6 @@ set(LIBNETDATA_FILES
src/libnetdata/os/byteorder.h
src/libnetdata/onewayalloc/onewayalloc.c
src/libnetdata/onewayalloc/onewayalloc.h
src/libnetdata/popen/popen.c
src/libnetdata/popen/popen.h
src/libnetdata/procfile/procfile.c
src/libnetdata/procfile/procfile.h
src/libnetdata/query_progress/progress.c
@ -720,8 +718,6 @@ set(LIBNETDATA_FILES
src/libnetdata/linked-lists.h
src/libnetdata/storage-point.h
src/libnetdata/bitmap64.h
src/libnetdata/os/waitid.c
src/libnetdata/os/waitid.h
src/libnetdata/os/gettid.c
src/libnetdata/os/gettid.h
src/libnetdata/os/adjtimex.c
@ -750,6 +746,14 @@ set(LIBNETDATA_FILES
src/libnetdata/os/setenv.h
src/libnetdata/os/strndup.c
src/libnetdata/os/strndup.h
src/libnetdata/spawn_server/spawn_server.c
src/libnetdata/spawn_server/spawn_server.h
src/libnetdata/spawn_server/spawn_popen.c
src/libnetdata/spawn_server/spawn_popen.h
src/libnetdata/os/close_range.c
src/libnetdata/os/close_range.h
src/libnetdata/os/setproctitle.c
src/libnetdata/os/setproctitle.h
)
if(ENABLE_PLUGIN_EBPF)
@ -1216,13 +1220,6 @@ set(CLAIM_PLUGIN_FILES
src/claim/claim.h
)
set(SPAWN_PLUGIN_FILES
src/spawn/spawn.c
src/spawn/spawn_server.c
src/spawn/spawn_client.c
src/spawn/spawn.h
)
set(ACLK_ALWAYS_BUILD
src/aclk/aclk_rrdhost_state.h
src/aclk/aclk_proxy.c
@ -1410,7 +1407,6 @@ set(NETDATA_FILES
${STREAMING_PLUGIN_FILES}
${WEB_PLUGIN_FILES}
${CLAIM_PLUGIN_FILES}
${SPAWN_PLUGIN_FILES}
${ACLK_ALWAYS_BUILD}
${PROFILE_PLUGIN_FILES}
)

View file

@ -26,6 +26,12 @@ then
exit 0
fi
BUILD_FOR_PACKAGING="Off"
if [ "${1}" = "package" ]
then
BUILD_FOR_PACKAGING="On"
fi
export PATH="/usr/local/bin:${PATH}"
WT_ROOT="$(pwd)"
@ -54,7 +60,7 @@ fi
-DCMAKE_INSTALL_PREFIX="/opt/netdata" \
-DCMAKE_BUILD_TYPE="${BUILD_TYPE}" \
-DCMAKE_C_FLAGS="-fstack-protector-all -O0 -ggdb -Wall -Wextra -Wno-char-subscripts -Wa,-mbig-obj -pipe -DNETDATA_INTERNAL_CHECKS=1 -D_FILE_OFFSET_BITS=64 -D__USE_MINGW_ANSI_STDIO=1" \
-DBUILD_FOR_PACKAGING=On \
-DBUILD_FOR_PACKAGING=${BUILD_FOR_PACKAGING} \
-DUSE_MOLD=Off \
-DNETDATA_USER="${USER}" \
-DDEFAULT_FEATURE_STATE=Off \

View file

@ -106,13 +106,13 @@ static inline int aclk_v2_payload_get_query(const char *payload, char **query_ur
else if(strncmp(payload, "DELETE /", 8) == 0)
start = payload + 7;
else {
errno = 0;
errno_clear();
netdata_log_error("Only accepting requests that start with GET, POST, PUT, DELETE from CLOUD.");
return 1;
}
if(!(end = strstr(payload, HTTP_1_1 HTTP_ENDL))) {
errno = 0;
errno_clear();
netdata_log_error("Doesn't look like HTTP GET request.");
return 1;
}
@ -127,7 +127,7 @@ static int aclk_handle_cloud_http_request_v2(struct aclk_request *cloud_to_agent
{
aclk_query_t query;
errno = 0;
errno_clear();
if (cloud_to_agent->version < ACLK_V_COMPRESSION) {
netdata_log_error(
"This handler cannot reply to request with version older than %d, received %d.",

View file

@ -53,11 +53,8 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con
}
#ifndef DISABLE_CLOUD
int exit_code;
pid_t command_pid;
char command_exec_buffer[CLAIMING_COMMAND_LENGTH + 1];
char command_line_buffer[CLAIMING_COMMAND_LENGTH + 1];
FILE *fp_child_output, *fp_child_input;
// This is guaranteed to be set early in main via post_conf_load()
char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL);
@ -92,17 +89,17 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con
claiming_arguments);
netdata_log_info("Executing agent claiming command: %s", command_exec_buffer);
fp_child_output = netdata_popen(command_line_buffer, &command_pid, &fp_child_input);
if(!fp_child_output) {
POPEN_INSTANCE *instance = spawn_popen_run(command_exec_buffer);
if(!instance) {
netdata_log_error("Cannot popen(\"%s\").", command_exec_buffer);
return CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT;
}
netdata_log_info("Waiting for claiming command '%s' to finish.", command_exec_buffer);
char read_buffer[100 + 1];
while (fgets(read_buffer, 100, fp_child_output) != NULL) ;
while (fgets(read_buffer, 100, instance->child_stdout_fp) != NULL) ;
exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
int exit_code = spawn_popen_wait(instance);
netdata_log_info("Agent claiming command '%s' returned with code %d", command_exec_buffer, exit_code);
if (0 == exit_code) {
@ -113,7 +110,7 @@ CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, con
netdata_log_error("Agent claiming command '%s' failed to complete its run", command_exec_buffer);
return CLAIM_AGENT_CLAIM_SCRIPT_FAILED;
}
errno = 0;
errno_clear();
unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1;
if ((unsigned)exit_code > maximum_known_exit_code) {
@ -214,7 +211,7 @@ void load_cloud_conf(int silent)
netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
char *filename;
errno = 0;
errno_clear();
int ret = 0;

View file

@ -51,7 +51,6 @@ size_t
inodes_changed_counter = 0,
links_changed_counter = 0,
targets_assignment_counter = 0,
all_pids_count = 0, // the number of processes running
apps_groups_targets_count = 0; // # of apps_groups.conf targets
int
@ -136,20 +135,6 @@ struct target
size_t pagesize;
struct pid_stat
*root_of_pids = NULL, // global list of all processes running
**all_pids = NULL; // to avoid allocations, we pre-allocate
// a pointer for each pid in the entire pid space.
#if (ALL_PIDS_ARE_READ_INSTANTLY == 0)
// Another pre-allocated list of all possible pids.
// We need it to pids and assign them a unique sortlist id, so that we
// read parents before children. This is needed to prevent a situation where
// a child is found running, but until we read its parent, it has exited and
// its parent has accumulated its resources.
pid_t *all_pids_sortlist = NULL;
#endif
// ----------------------------------------------------------------------------
int managed_log(struct pid_stat *p, PID_LOG log, int status) {
@ -208,7 +193,7 @@ int managed_log(struct pid_stat *p, PID_LOG log, int status) {
}
}
}
errno = 0;
errno_clear();
}
else if(unlikely(p->log_thrown & log)) {
// netdata_log_error("unsetting log %u on pid %d", log, p->pid);
@ -300,12 +285,14 @@ static void apply_apps_groups_targets_inheritance(void) {
}
// init goes always to default target
if(all_pids[INIT_PID] && !all_pids[INIT_PID]->matched_by_config)
all_pids[INIT_PID]->target = apps_groups_default_target;
struct pid_stat *pi = find_pid_entry(INIT_PID);
if(pi && !pi->matched_by_config)
pi->target = apps_groups_default_target;
// pid 0 goes always to default target
if(all_pids[0] && !all_pids[INIT_PID]->matched_by_config)
all_pids[0]->target = apps_groups_default_target;
pi = find_pid_entry(0);
if(pi && !pi->matched_by_config)
pi->target = apps_groups_default_target;
// give a default target on all top level processes
if(unlikely(debug_enabled)) loops++;
@ -320,8 +307,9 @@ static void apply_apps_groups_targets_inheritance(void) {
p->sortlist = sortlist++;
}
if(all_pids[1])
all_pids[1]->sortlist = sortlist++;
pi = find_pid_entry(1);
if(pi)
pi->sortlist = sortlist++;
// give a target to all merged child processes
found = 1;
@ -1052,12 +1040,7 @@ int main(int argc, char **argv) {
netdata_log_info("started on pid %d", getpid());
users_and_groups_init();
#if (ALL_PIDS_ARE_READ_INSTANTLY == 0)
all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1);
#endif
all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1);
pids_init();
// ------------------------------------------------------------------------
// the event loop for functions

View file

@ -17,9 +17,7 @@
#include <sys/proc_info.h>
#include <sys/sysctl.h>
#include <mach/mach_time.h> // For mach_timebase_info_data_t and mach_timebase_info
#endif
#if defined(__APPLE__)
extern mach_timebase_info_data_t mach_info;
#endif
@ -47,7 +45,6 @@ struct pid_info {
struct proc_taskinfo taskinfo;
struct proc_bsdinfo bsdinfo;
struct rusage_info_v4 rusageinfo;
};
#endif
@ -467,9 +464,7 @@ extern struct target
*users_root_target,
*groups_root_target;
extern struct pid_stat
*root_of_pids,
**all_pids;
extern struct pid_stat *root_of_pids;
extern int update_every;
extern unsigned int time_factor;
@ -559,4 +554,7 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const
void send_collected_data_to_netdata(struct target *root, const char *type, usec_t dt);
void send_resource_usage_to_netdata(usec_t dt);
void pids_init(void);
struct pid_stat *find_pid_entry(pid_t pid);
#endif //NETDATA_APPS_PLUGIN_H

View file

@ -33,7 +33,7 @@ static inline bool read_proc_pid_limits_per_os(struct pid_stat *p, void *ptr __m
bool ret = false;
bool read_limits = false;
errno = 0;
errno_clear();
proc_pid_limits_buffer[0] = '\0';
kernel_uint_t all_fds = pid_openfds_sum(p);

View file

@ -2,18 +2,44 @@
#include "apps_plugin.h"
static inline struct pid_stat *get_pid_entry(pid_t pid) {
if(likely(all_pids[pid]))
return all_pids[pid];
static struct pid_stat **all_pids = NULL;
size_t all_pids_count = 0; // the number of processes running
struct pid_stat *p = callocz(sizeof(struct pid_stat), 1);
struct pid_stat *root_of_pids = NULL; // global linked list of all processes running
#if (ALL_PIDS_ARE_READ_INSTANTLY == 0)
// Another pre-allocated list of all possible pids.
// We need it to assign them a unique sortlist id, so that we
// read parents before children. This is needed to prevent a situation where
// a child is found running, but until we read its parent, it has exited and
// its parent has accumulated its resources.
pid_t *all_pids_sortlist = NULL;
#endif
void pids_init(void) {
#if (ALL_PIDS_ARE_READ_INSTANTLY == 0)
all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1);
#endif
all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1);
}
inline struct pid_stat *find_pid_entry(pid_t pid) {
return all_pids[pid];
}
static inline struct pid_stat *get_or_allocate_pid_entry(pid_t pid) {
struct pid_stat *p = find_pid_entry(pid);
if(likely(p))
return p;
p = callocz(sizeof(struct pid_stat), 1);
p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS);
p->fds_size = MAX_SPARE_FDS;
init_pid_fds(p, 0, p->fds_size);
p->pid = pid;
DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(root_of_pids, p, prev, next);
all_pids[pid] = p;
all_pids_count++;
@ -21,7 +47,7 @@ static inline struct pid_stat *get_pid_entry(pid_t pid) {
}
static inline void del_pid_entry(pid_t pid) {
struct pid_stat *p = all_pids[pid];
struct pid_stat *p = find_pid_entry(pid);
if(unlikely(!p)) {
netdata_log_error("attempted to free pid %d that is not allocated.", pid);
@ -62,7 +88,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) {
return 0;
}
struct pid_stat *p = get_pid_entry(pid);
struct pid_stat *p = get_or_allocate_pid_entry(pid);
if(unlikely(!p || p->read)) return 0;
p->read = true;

View file

@ -178,11 +178,9 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) {
netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id);
netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id);
pid_t cgroup_pid;
FILE *fp_child_input, *fp_child_output;
(void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id);
if (!fp_child_output) {
POPEN_INSTANCE *instance = spawn_popen_run_variadic(cgroups_rename_script, cg->id, cg->intermediate_id, NULL);
if (!instance) {
collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id);
cg->pending_renames = 0;
cg->processed = 1;
@ -190,8 +188,8 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) {
}
char buffer[CGROUP_CHARTID_LINE_MAX + 1];
char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output);
int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, instance->child_stdout_fp);
int exit_code = spawn_popen_wait(instance);
switch (exit_code) {
case 0:
@ -1085,7 +1083,6 @@ static void cgroup_cleanup_ebpf_integration()
static inline void read_cgroup_network_interfaces(struct cgroup *cg) {
netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id);
pid_t cgroup_pid;
char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
@ -1096,16 +1093,15 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) {
}
netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id);
FILE *fp_child_input, *fp_child_output;
(void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier);
if(!fp_child_output) {
POPEN_INSTANCE *instance = spawn_popen_run_variadic(cgroups_network_interface_script, "--cgroup", cgroup_identifier, NULL);
if(!instance) {
collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier);
return;
}
char *s;
char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) {
while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, instance->child_stdout_fp))) {
trim(s);
if(*s && *s != '\n') {
@ -1145,7 +1141,7 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) {
}
}
netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
spawn_popen_wait(instance);
}
static inline void discovery_process_cgroup(struct cgroup *cg) {

View file

@ -421,19 +421,19 @@ void detect_veth_interfaces(pid_t pid) {
host = read_proc_net_dev("host", netdata_configured_host_prefix);
if(!host) {
errno = 0;
errno_clear();
collector_error("cannot read host interface list.");
goto cleanup;
}
if(!eligible_ifaces(host)) {
errno = 0;
errno_clear();
collector_info("there are no double-linked host interfaces available.");
goto cleanup;
}
if(switch_namespace(netdata_configured_host_prefix, pid)) {
errno = 0;
errno_clear();
collector_error("cannot switch to the namespace of pid %u", (unsigned int) pid);
goto cleanup;
}
@ -444,13 +444,13 @@ void detect_veth_interfaces(pid_t pid) {
cgroup = read_proc_net_dev("cgroup", NULL);
if(!cgroup) {
errno = 0;
errno_clear();
collector_error("cannot read cgroup interface list.");
goto cleanup;
}
if(!eligible_ifaces(cgroup)) {
errno = 0;
errno_clear();
collector_error("there are not double-linked cgroup interfaces available.");
goto cleanup;
}
@ -505,22 +505,20 @@ void call_the_helper(pid_t pid, const char *cgroup) {
collector_info("running: %s", command);
pid_t cgroup_pid;
FILE *fp_child_input, *fp_child_output;
POPEN_INSTANCE *pi;
if(cgroup) {
(void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup);
}
if(cgroup)
pi = spawn_popen_run_variadic(PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup, NULL);
else {
char buffer[100];
snprintfz(buffer, sizeof(buffer) - 1, "%d", pid);
(void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer);
pi = spawn_popen_run_variadic(PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer, NULL);
}
if(fp_child_output) {
if(pi) {
char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
char *s;
while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) {
while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, pi->child_stdout_fp))) {
trim(s);
if(*s && *s != '\n') {
@ -536,7 +534,7 @@ void call_the_helper(pid_t pid, const char *cgroup) {
}
}
netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
spawn_popen_kill(pi);
}
else
collector_error("cannot execute cgroup-network helper script: %s", command);
@ -701,7 +699,7 @@ int main(int argc, char **argv) {
pid = atoi(argv[arg+1]);
if(pid <= 0) {
errno = 0;
errno_clear();
collector_error("Invalid pid %d given", (int) pid);
return 2;
}
@ -719,7 +717,7 @@ int main(int argc, char **argv) {
if(helper) call_the_helper(pid, cgroup);
if(pid <= 0 && !detected_devices) {
errno = 0;
errno_clear();
collector_error("Cannot find a cgroup PID from cgroup '%s'", cgroup);
}
}

View file

@ -73,30 +73,19 @@ struct discovery_thread discovery_thread;
#define MAXSIZE_PROC_CMDLINE 4096
static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec)
{
pid_t command_pid;
enum cgroups_systemd_setting retval = SYSTEMD_CGROUP_ERR;
char buf[MAXSIZE_PROC_CMDLINE];
char *begin, *end;
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen(exec, &command_pid, &fp_child_input);
if (!fp_child_output)
POPEN_INSTANCE *pi = spawn_popen_run(exec);
if(!pi)
return retval;
int fd = fileno(fp_child_output);
if (fd == -1 ) {
collector_error("Cannot get the output of \"%s\": failed to get file descriptor", exec);
netdata_pclose(fp_child_input, fp_child_output, command_pid);
return retval;
}
struct pollfd pfd;
pfd.fd = fd;
pfd.fd = spawn_server_instance_read_fd(pi->si);
pfd.events = POLLIN;
int timeout = 3000; // milliseconds
int ret = poll(&pfd, 1, timeout);
if (ret == -1) {
@ -104,7 +93,7 @@ static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec)
} else if (ret == 0) {
collector_info("Cannot get the output of \"%s\" within timeout (%d ms)", exec, timeout);
} else {
while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) {
while (fgets(buf, MAXSIZE_PROC_CMDLINE, pi->child_stdout_fp) != NULL) {
if ((begin = strstr(buf, SYSTEMD_HIERARCHY_STRING))) {
end = begin = begin + strlen(SYSTEMD_HIERARCHY_STRING);
if (!*begin)
@ -123,7 +112,7 @@ static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec)
}
}
if (netdata_pclose(fp_child_input, fp_child_output, command_pid))
if(spawn_popen_wait(pi) != 0)
return SYSTEMD_CGROUP_ERR;
return retval;
@ -159,25 +148,23 @@ static enum cgroups_type cgroups_try_detect_version()
collector_info("cgroups version: can't detect using statfs (fs type), falling back to heuristics.");
pid_t command_pid;
char buf[MAXSIZE_PROC_CMDLINE];
enum cgroups_systemd_setting systemd_setting;
int cgroups2_available = 0;
// 1. check if cgroups2 available on system at all
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen("grep cgroup /proc/filesystems", &command_pid, &fp_child_input);
if (!fp_child_output) {
collector_error("popen failed");
POPEN_INSTANCE *instance = spawn_popen_run("grep cgroup /proc/filesystems");
if(!instance) {
collector_error("cannot run 'grep cgroup /proc/filesystems'");
return CGROUPS_AUTODETECT_FAIL;
}
while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) {
while (fgets(buf, MAXSIZE_PROC_CMDLINE, instance->child_stdout_fp) != NULL) {
if (strstr(buf, "cgroup2")) {
cgroups2_available = 1;
break;
}
}
if(netdata_pclose(fp_child_input, fp_child_output, command_pid))
if(spawn_popen_wait(instance) != 0)
return CGROUPS_AUTODETECT_FAIL;
if(!cgroups2_available)

View file

@ -231,7 +231,7 @@ int main(int argc, char **argv) {
parse_command_line(argc, argv);
errno = 0;
errno_clear();
dict_dest_job_metrics = dictionary_create(DICT_OPTION_SINGLE_THREADED);

View file

@ -441,7 +441,7 @@ static inline int managed_log(struct ebpf_pid_stat *p, uint32_t log, int status)
}
}
}
errno = 0;
errno_clear();
} else if (unlikely(p->log_thrown & log)) {
// netdata_log_error("unsetting log %u on pid %d", log, p->pid);
p->log_thrown &= ~log;

View file

@ -1120,7 +1120,7 @@ static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *stt,
}
int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *stt) {
errno = 0;
errno_clear();
if(type & IPMI_COLLECT_TYPE_SENSORS) {
stt->sensors.collected = 0;
@ -1930,7 +1930,7 @@ int main (int argc, char **argv) {
collector_error("%s(): ignoring parameter '%s'", __FUNCTION__, argv[i]);
}
errno = 0;
errno_clear();
if(freq_s && freq_s < update_every)
collector_info("%s(): update frequency %d seconds is too small for IPMI. Using %d.",

View file

@ -4,12 +4,22 @@
#include "libnetdata/libnetdata.h"
#include "libnetdata/required_dummies.h"
static SPAWN_SERVER *spawn_srv = NULL;
#define ENABLE_DETAILED_VIEW
#define LOCAL_SOCKETS_EXTENDED_MEMBERS struct { \
size_t count; \
const char *local_address_space; \
const char *remote_address_space; \
struct { \
pid_t pid; \
uid_t uid; \
SOCKET_DIRECTION direction; \
int state; \
uint64_t net_ns_inode; \
struct socket_endpoint server; \
const char *local_address_space; \
const char *remote_address_space; \
} aggregated_key; \
} network_viewer;
#include "libnetdata/maps/local-sockets.h"
@ -57,19 +67,49 @@ ENUM_STR_MAP_DEFINE(TCP_STATE) = {
};
ENUM_STR_DEFINE_FUNCTIONS(TCP_STATE, 0, "unknown");
static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t proc_self_net_ns_inode, bool aggregated) {
struct sockets_stats {
BUFFER *wb;
struct {
uint32_t tcpi_rtt;
uint32_t tcpi_rcv_rtt;
uint32_t tcpi_total_retrans;
} max;
};
static void local_socket_to_json_array(struct sockets_stats *st, LOCAL_SOCKET *n, uint64_t proc_self_net_ns_inode, bool aggregated) {
if(n->direction == SOCKET_DIRECTION_NONE)
return;
BUFFER *wb = st->wb;
char local_address[INET6_ADDRSTRLEN];
char remote_address[INET6_ADDRSTRLEN];
char *protocol;
if(n->local.family == AF_INET) {
ipv4_address_to_txt(n->local.ip.ipv4, local_address);
ipv4_address_to_txt(n->remote.ip.ipv4, remote_address);
if(local_sockets_is_zero_address(&n->remote))
remote_address[0] = '\0';
else
ipv4_address_to_txt(n->remote.ip.ipv4, remote_address);
protocol = n->local.protocol == IPPROTO_TCP ? "tcp4" : "udp4";
}
else if(is_local_socket_ipv46(n)) {
strncpyz(local_address, "*", sizeof(local_address) - 1);
remote_address[0] = '\0';
protocol = n->local.protocol == IPPROTO_TCP ? "tcp46" : "udp46";
}
else if(n->local.family == AF_INET6) {
ipv6_address_to_txt(&n->local.ip.ipv6, local_address);
ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address);
if(local_sockets_is_zero_address(&n->remote))
remote_address[0] = '\0';
else
ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address);
protocol = n->local.protocol == IPPROTO_TCP ? "tcp6" : "udp6";
}
else
@ -117,43 +157,45 @@ static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t pro
buffer_json_add_array_item_string(wb, local_address);
buffer_json_add_array_item_uint64(wb, n->local.port);
}
buffer_json_add_array_item_string(wb, n->network_viewer.local_address_space);
buffer_json_add_array_item_string(wb, n->network_viewer.aggregated_key.local_address_space);
if(!aggregated) {
buffer_json_add_array_item_string(wb, remote_address);
buffer_json_add_array_item_uint64(wb, n->remote.port);
}
buffer_json_add_array_item_string(wb, n->network_viewer.remote_address_space);
buffer_json_add_array_item_string(wb, n->network_viewer.aggregated_key.remote_address_space);
uint16_t server_port = 0;
const char *server_address = NULL;
const char *client_address_space = NULL;
const char *server_address_space = NULL;
const char *server_address;
const char *client_address_space;
const char *server_address_space;
switch (n->direction) {
case SOCKET_DIRECTION_LISTEN:
case SOCKET_DIRECTION_INBOUND:
case SOCKET_DIRECTION_LOCAL_INBOUND:
server_port = n->local.port;
server_address = local_address;
server_address_space = n->network_viewer.local_address_space;
client_address_space = n->network_viewer.remote_address_space;
server_address_space = n->network_viewer.aggregated_key.local_address_space;
client_address_space = n->network_viewer.aggregated_key.remote_address_space;
break;
case SOCKET_DIRECTION_OUTBOUND:
case SOCKET_DIRECTION_LOCAL_OUTBOUND:
server_port = n->remote.port;
server_address = remote_address;
server_address_space = n->network_viewer.remote_address_space;
client_address_space = n->network_viewer.local_address_space;
server_address_space = n->network_viewer.aggregated_key.remote_address_space;
client_address_space = n->network_viewer.aggregated_key.local_address_space;
break;
case SOCKET_DIRECTION_NONE:
server_address = NULL;
client_address_space = NULL;
server_address_space = NULL;
break;
}
if(aggregated)
buffer_json_add_array_item_string(wb, server_address);
buffer_json_add_array_item_uint64(wb, server_port);
if(aggregated) {
buffer_json_add_array_item_string(wb, server_address);
}
buffer_json_add_array_item_uint64(wb, n->network_viewer.aggregated_key.server.port);
if(aggregated) {
buffer_json_add_array_item_string(wb, client_address_space);
@ -162,58 +204,176 @@ static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t pro
// buffer_json_add_array_item_uint64(wb, n->inode);
// buffer_json_add_array_item_uint64(wb, n->net_ns_inode);
// RTT
buffer_json_add_array_item_double(wb, (double)n->info.tcp.tcpi_rtt / (double)USEC_PER_MS);
if(st->max.tcpi_rtt < n->info.tcp.tcpi_rtt)
st->max.tcpi_rtt = n->info.tcp.tcpi_rtt;
// Receiver RTT
buffer_json_add_array_item_double(wb, (double)n->info.tcp.tcpi_rcv_rtt / (double)USEC_PER_MS);
if(st->max.tcpi_rcv_rtt < n->info.tcp.tcpi_rcv_rtt)
st->max.tcpi_rcv_rtt = n->info.tcp.tcpi_rcv_rtt;
// Retransmissions
buffer_json_add_array_item_uint64(wb, n->info.tcp.tcpi_total_retrans);
if(st->max.tcpi_total_retrans < n->info.tcp.tcpi_total_retrans)
st->max.tcpi_total_retrans = n->info.tcp.tcpi_total_retrans;
// count
buffer_json_add_array_item_uint64(wb, n->network_viewer.count);
}
buffer_json_array_close(wb);
}
static void local_sockets_cb_to_json(LS_STATE *ls, LOCAL_SOCKET *n, void *data) {
static void populate_aggregated_key(LOCAL_SOCKET *n) {
n->network_viewer.count = 1;
n->network_viewer.local_address_space = local_sockets_address_space(&n->local);
n->network_viewer.remote_address_space = local_sockets_address_space(&n->remote);
local_socket_to_json_array(data, n, ls->proc_self_net_ns_inode, false);
}
static void local_sockets_cb_to_aggregation(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n, void *data) {
SIMPLE_HASHTABLE_AGGREGATED_SOCKETS *ht = data;
n->network_viewer.count = 1;
n->network_viewer.local_address_space = local_sockets_address_space(&n->local);
n->network_viewer.remote_address_space = local_sockets_address_space(&n->remote);
n->network_viewer.aggregated_key.pid = n->pid;
n->network_viewer.aggregated_key.uid = n->uid;
n->network_viewer.aggregated_key.direction = n->direction;
n->network_viewer.aggregated_key.net_ns_inode = n->net_ns_inode;
n->network_viewer.aggregated_key.state = n->state;
switch(n->direction) {
case SOCKET_DIRECTION_INBOUND:
case SOCKET_DIRECTION_LOCAL_INBOUND:
case SOCKET_DIRECTION_LISTEN:
memset(&n->remote.ip, 0, sizeof(n->remote.ip));
n->remote.port = 0;
n->network_viewer.aggregated_key.server = n->local;
break;
case SOCKET_DIRECTION_OUTBOUND:
case SOCKET_DIRECTION_LOCAL_OUTBOUND:
memset(&n->local.ip, 0, sizeof(n->local.ip));
n->local.port = 0;
n->network_viewer.aggregated_key.server = n->remote;
break;
case SOCKET_DIRECTION_NONE:
return;
break;
}
n->inode = 0;
n->local_ip_hash = 0;
n->remote_ip_hash = 0;
n->local_port_hash = 0;
n->timer = 0;
n->retransmits = 0;
n->expires = 0;
n->rqueue = 0;
n->wqueue = 0;
memset(&n->local_port_key, 0, sizeof(n->local_port_key));
n->network_viewer.aggregated_key.local_address_space = local_sockets_address_space(&n->local);
n->network_viewer.aggregated_key.remote_address_space = local_sockets_address_space(&n->remote);
}
XXH64_hash_t hash = XXH3_64bits(n, sizeof(*n));
static void local_sockets_cb_to_json(LS_STATE *ls, LOCAL_SOCKET *n, void *data) {
struct sockets_stats *st = data;
populate_aggregated_key(n);
local_socket_to_json_array(st, n, ls->proc_self_net_ns_inode, false);
}
#define KEEP_THE_BIGGER(a, b) (a) = ((a) < (b)) ? (b) : (a)
#define KEEP_THE_SMALLER(a, b) (a) = ((a) > (b)) ? (b) : (a)
#define SUM_THEM_ALL(a, b) (a) += (b)
#define OR_THEM_ALL(a, b) (a) |= (b)
static void local_sockets_cb_to_aggregation(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n, void *data) {
SIMPLE_HASHTABLE_AGGREGATED_SOCKETS *ht = data;
populate_aggregated_key(n);
XXH64_hash_t hash = XXH3_64bits(&n->network_viewer.aggregated_key, sizeof(n->network_viewer.aggregated_key));
SIMPLE_HASHTABLE_SLOT_AGGREGATED_SOCKETS *sl = simple_hashtable_get_slot_AGGREGATED_SOCKETS(ht, hash, n, true);
LOCAL_SOCKET *t = SIMPLE_HASHTABLE_SLOT_DATA(sl);
if(t) {
t->network_viewer.count++;
KEEP_THE_BIGGER(t->timer, n->timer);
KEEP_THE_BIGGER(t->retransmits, n->retransmits);
KEEP_THE_SMALLER(t->expires, n->expires);
KEEP_THE_BIGGER(t->rqueue, n->rqueue);
KEEP_THE_BIGGER(t->wqueue, n->wqueue);
// The current number of consecutive retransmissions that have occurred for the most recently transmitted segment.
SUM_THEM_ALL(t->info.tcp.tcpi_retransmits, n->info.tcp.tcpi_retransmits);
// The total number of retransmissions that have occurred for the entire connection since it was established.
SUM_THEM_ALL(t->info.tcp.tcpi_total_retrans, n->info.tcp.tcpi_total_retrans);
// The total number of segments that have been retransmitted since the connection was established.
SUM_THEM_ALL(t->info.tcp.tcpi_retrans, n->info.tcp.tcpi_retrans);
// The number of keepalive probes sent
SUM_THEM_ALL(t->info.tcp.tcpi_probes, n->info.tcp.tcpi_probes);
// The number of times the retransmission timeout has been backed off.
SUM_THEM_ALL(t->info.tcp.tcpi_backoff, n->info.tcp.tcpi_backoff);
// A bitmask representing the TCP options currently enabled for the connection, such as SACK and Timestamps.
OR_THEM_ALL(t->info.tcp.tcpi_options, n->info.tcp.tcpi_options);
// The send window scale value used for this connection
KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_wscale, n->info.tcp.tcpi_snd_wscale);
// The receive window scale value used for this connection
KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_wscale, n->info.tcp.tcpi_rcv_wscale);
// Retransmission timeout in milliseconds
KEEP_THE_SMALLER(t->info.tcp.tcpi_rto, n->info.tcp.tcpi_rto);
// The delayed acknowledgement timeout in milliseconds.
KEEP_THE_SMALLER(t->info.tcp.tcpi_ato, n->info.tcp.tcpi_ato);
// The maximum segment size for sending.
KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_mss, n->info.tcp.tcpi_snd_mss);
// The maximum segment size for receiving.
KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_mss, n->info.tcp.tcpi_rcv_mss);
// The number of unacknowledged segments
SUM_THEM_ALL(t->info.tcp.tcpi_unacked, n->info.tcp.tcpi_unacked);
// The number of segments that have been selectively acknowledged
SUM_THEM_ALL(t->info.tcp.tcpi_sacked, n->info.tcp.tcpi_sacked);
// The number of segments that have been selectively acknowledged
SUM_THEM_ALL(t->info.tcp.tcpi_sacked, n->info.tcp.tcpi_sacked);
// The number of lost segments.
SUM_THEM_ALL(t->info.tcp.tcpi_lost, n->info.tcp.tcpi_lost);
// The number of forward acknowledgment segments.
SUM_THEM_ALL(t->info.tcp.tcpi_fackets, n->info.tcp.tcpi_fackets);
// The time in milliseconds since the last data was sent.
KEEP_THE_SMALLER(t->info.tcp.tcpi_last_data_sent, n->info.tcp.tcpi_last_data_sent);
// The time in milliseconds since the last acknowledgment was sent (not tracked in Linux, hence often zero).
KEEP_THE_SMALLER(t->info.tcp.tcpi_last_ack_sent, n->info.tcp.tcpi_last_ack_sent);
// The time in milliseconds since the last data was received.
KEEP_THE_SMALLER(t->info.tcp.tcpi_last_data_recv, n->info.tcp.tcpi_last_data_recv);
// The time in milliseconds since the last acknowledgment was received.
KEEP_THE_SMALLER(t->info.tcp.tcpi_last_ack_recv, n->info.tcp.tcpi_last_ack_recv);
// The path MTU for this connection
KEEP_THE_SMALLER(t->info.tcp.tcpi_pmtu, n->info.tcp.tcpi_pmtu);
// The slow start threshold for receiving
KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_ssthresh, n->info.tcp.tcpi_rcv_ssthresh);
// The slow start threshold for sending
KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_ssthresh, n->info.tcp.tcpi_snd_ssthresh);
// The round trip time in milliseconds
KEEP_THE_BIGGER(t->info.tcp.tcpi_rtt, n->info.tcp.tcpi_rtt);
// The round trip time variance in milliseconds.
KEEP_THE_BIGGER(t->info.tcp.tcpi_rttvar, n->info.tcp.tcpi_rttvar);
// The size of the sending congestion window.
KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_cwnd, n->info.tcp.tcpi_snd_cwnd);
// The maximum segment size that could be advertised.
KEEP_THE_BIGGER(t->info.tcp.tcpi_advmss, n->info.tcp.tcpi_advmss);
// The reordering metric
KEEP_THE_SMALLER(t->info.tcp.tcpi_reordering, n->info.tcp.tcpi_reordering);
// The receive round trip time in milliseconds.
KEEP_THE_BIGGER(t->info.tcp.tcpi_rcv_rtt, n->info.tcp.tcpi_rcv_rtt);
// The available space in the receive buffer.
KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_space, n->info.tcp.tcpi_rcv_space);
}
else {
t = mallocz(sizeof(*t));
@ -240,6 +400,10 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu
wb->content_type = CT_APPLICATION_JSON;
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY);
struct sockets_stats st = {
.wb = wb,
};
buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
buffer_json_member_add_string(wb, "type", "table");
buffer_json_member_add_time_t(wb, "update_every", 5);
@ -328,9 +492,12 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu
.cmdline = true,
.comm = true,
.namespaces = true,
.tcp_info = true,
.max_errors = 10,
.max_concurrent_namespaces = 5,
},
.spawn_server = spawn_srv,
.stats = { 0 },
.sockets_hashtable = { 0 },
.local_ips_hashtable = { 0 },
@ -345,7 +512,7 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu
}
else {
ls.config.cb = local_sockets_cb_to_json;
ls.config.data = wb;
ls.config.data = &st;
}
local_sockets_process(&ls);
@ -366,7 +533,7 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu
qsort(array, added, sizeof(LOCAL_SOCKET *), local_sockets_compar);
for(size_t i = 0; i < added ;i++) {
local_socket_to_json_array(wb, array[i], proc_self_net_ns_inode, true);
local_socket_to_json_array(&st, array[i], proc_self_net_ns_inode, true);
string_freez(array[i]->cmdline);
freez(array[i]);
}
@ -555,14 +722,40 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu
// RRDF_FIELD_OPTS_NONE,
// NULL);
// RTT
buffer_rrdf_table_add_field(wb, field_id++, "RTT", aggregated ? "Max Smoothed Round Trip Time" : "Smoothed Round Trip Time",
RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
2, "ms", st.max.tcpi_rtt / USEC_PER_MS, RRDF_FIELD_SORT_DESCENDING, NULL,
RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE,
RRDF_FIELD_OPTS_VISIBLE,
NULL);
// Asymmetry RTT
buffer_rrdf_table_add_field(wb, field_id++, "RecvRTT", aggregated ? "Max Receiver ACKs RTT" : "Receiver ACKs RTT",
RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
2, "ms", st.max.tcpi_rcv_rtt / USEC_PER_MS, RRDF_FIELD_SORT_DESCENDING, NULL,
RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE,
RRDF_FIELD_OPTS_VISIBLE,
NULL);
// Rentrasmissions
buffer_rrdf_table_add_field(wb, field_id++, "Retrans", "Total Retransmissions",
RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
0, "packets", st.max.tcpi_total_retrans, RRDF_FIELD_SORT_DESCENDING, NULL,
RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE,
RRDF_FIELD_OPTS_VISIBLE,
NULL);
// Count
buffer_rrdf_table_add_field(wb, field_id++, "Count", "Number of sockets like this",
RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL,
RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
0, "sockets", NAN, RRDF_FIELD_SORT_DESCENDING, NULL,
RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
aggregated ? (RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY) : RRDF_FIELD_OPTS_NONE,
NULL);
}
buffer_json_object_close(wb); // columns
buffer_json_member_add_string(wb, "default_sort_column", aggregated ? "Count" : "Direction");
@ -747,18 +940,28 @@ int main(int argc __maybe_unused, char **argv __maybe_unused) {
uc = system_usernames_cache_init();
spawn_srv = spawn_server_create("setns", local_sockets_spawn_server_callback, argc, (const char **)argv);
if(spawn_srv == NULL) {
fprintf(stderr, "Cannot create spawn server.\n");
exit(1);
}
// ----------------------------------------------------------------------------------------------------------------
if(argc == 2 && strcmp(argv[1], "debug") == 0) {
bool cancelled = false;
usec_t stop_monotonic_ut = now_monotonic_usec() + 600 * USEC_PER_SEC;
char buf[] = "network-connections sockets:aggregated";
network_viewer_function("123", buf, &stop_monotonic_ut, &cancelled,
NULL, HTTP_ACCESS_ALL, NULL, NULL);
// for(int i = 0; i < 100; i++) {
bool cancelled = false;
usec_t stop_monotonic_ut = now_monotonic_usec() + 600 * USEC_PER_SEC;
char buf[] = "network-connections sockets:aggregated";
network_viewer_function("123", buf, &stop_monotonic_ut, &cancelled,
NULL, HTTP_ACCESS_ALL, NULL, NULL);
char buf2[] = "network-connections sockets:detailed";
network_viewer_function("123", buf2, &stop_monotonic_ut, &cancelled,
NULL, HTTP_ACCESS_ALL, NULL, NULL);
char buf2[] = "network-connections sockets:detailed";
network_viewer_function("123", buf2, &stop_monotonic_ut, &cancelled,
NULL, HTTP_ACCESS_ALL, NULL, NULL);
// }
spawn_server_destroy(spawn_srv);
exit(1);
}
@ -799,5 +1002,8 @@ int main(int argc __maybe_unused, char **argv __maybe_unused) {
}
}
spawn_server_destroy(spawn_srv);
spawn_srv = NULL;
return 0;
}

View file

@ -809,7 +809,7 @@ int main(int argc, char **argv) {
nfacct_signals();
errno = 0;
errno_clear();
if(freq >= netdata_update_every)
netdata_update_every = freq;

View file

@ -1288,7 +1288,7 @@ int main(int argc, char **argv) {
parse_command_line(argc, argv);
errno = 0;
errno_clear();
if(freq >= update_every)
update_every = freq;

View file

@ -15,6 +15,14 @@ static const char *protocol_name(LOCAL_SOCKET *n) {
else
return "UNKNOWN_IPV4";
}
else if(is_local_socket_ipv46(n)) {
if (n->local.protocol == IPPROTO_TCP)
return "TCP46";
else if(n->local.protocol == IPPROTO_UDP)
return "UDP46";
else
return "UNKNOWN_IPV46";
}
else if(n->local.family == AF_INET6) {
if (n->local.protocol == IPPROTO_TCP)
return "TCP6";
@ -35,6 +43,10 @@ static void print_local_listeners(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n,
ipv4_address_to_txt(n->local.ip.ipv4, local_address);
ipv4_address_to_txt(n->remote.ip.ipv4, remote_address);
}
else if(is_local_socket_ipv46(n)) {
strncpyz(local_address, "*", sizeof(local_address) - 1);
remote_address[0] = '\0';
}
else if(n->local.family == AF_INET6) {
ipv6_address_to_txt(&n->local.ip.ipv6, local_address);
ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address);
@ -93,8 +105,10 @@ int main(int argc, char **argv) {
.cmdline = true,
.comm = false,
.namespaces = true,
.tcp_info = false,
.max_errors = 10,
.max_concurrent_namespaces = 10,
.cb = print_local_listeners,
.data = NULL,
@ -212,6 +226,7 @@ int main(int argc, char **argv) {
ls.config.comm = true;
ls.config.cmdline = true;
ls.config.namespaces = true;
ls.config.tcp_info = true;
ls.config.uid = true;
ls.config.max_errors = SIZE_MAX;
ls.config.cb = print_local_listeners_debug;
@ -276,8 +291,17 @@ int main(int argc, char **argv) {
}
}
SPAWN_SERVER *spawn_server = spawn_server_create(NULL, local_sockets_spawn_server_callback, argc, (const char **)argv);
if(spawn_server == NULL) {
fprintf(stderr, "Cannot create spawn server.\n");
exit(1);
}
ls.spawn_server = spawn_server;
local_sockets_process(&ls);
spawn_server_destroy(spawn_server);
getrusage(RUSAGE_SELF, &ended);
if(debug) {
@ -285,7 +309,7 @@ int main(int argc, char **argv) {
unsigned long long system = ended.ru_stime.tv_sec * 1000000ULL + ended.ru_stime.tv_usec - started.ru_stime.tv_sec * 1000000ULL + started.ru_stime.tv_usec;
unsigned long long total = user + system;
fprintf(stderr, "CPU Usage %llu user, %llu system, %llu total\n", user, system, total);
fprintf(stderr, "CPU Usage %llu user, %llu system, %llu total, %zu namespaces, %zu nl requests (without namespaces)\n", user, system, total, ls.stats.namespaces_found, ls.stats.mnl_sends);
}
return 0;

View file

@ -68,23 +68,15 @@ static void pluginsd_worker_thread_cleanup(void *pptr) {
cd->unsafe.running = false;
cd->unsafe.thread = 0;
pid_t pid = cd->unsafe.pid;
cd->unsafe.pid = 0;
POPEN_INSTANCE *pi = cd->unsafe.pi;
cd->unsafe.pi = NULL;
spinlock_unlock(&cd->unsafe.spinlock);
if (pid) {
siginfo_t info;
netdata_log_info("PLUGINSD: 'host:%s', killing data collection child process with pid %d",
rrdhost_hostname(cd->host), pid);
if (killpid(pid) != -1) {
netdata_log_info("PLUGINSD: 'host:%s', waiting for data collection child process pid %d to exit...",
rrdhost_hostname(cd->host), pid);
netdata_waitid(P_PID, (id_t)pid, &info, WEXITED);
}
}
if (pi)
spawn_popen_kill(pi);
}
#define SERIAL_FAILURES_THRESHOLD 10
@ -160,14 +152,13 @@ static void *pluginsd_worker_thread(void *arg) {
size_t count = 0;
while(service_running(SERVICE_COLLECTORS)) {
FILE *fp_child_input = NULL;
FILE *fp_child_output = netdata_popen(cd->cmd, &cd->unsafe.pid, &fp_child_input);
if(unlikely(!fp_child_input || !fp_child_output)) {
cd->unsafe.pi = spawn_popen_run(cd->cmd);
if(!cd->unsafe.pi) {
netdata_log_error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").",
rrdhost_hostname(cd->host), cd->cmd);
break;
}
cd->unsafe.pid = spawn_server_instance_pid(cd->unsafe.pi->si);
nd_log(NDLS_DAEMON, NDLP_DEBUG,
"PLUGINSD: 'host:%s' connected to '%s' running on pid %d",
@ -190,15 +181,14 @@ static void *pluginsd_worker_thread(void *arg) {
};
ND_LOG_STACK_PUSH(lgs);
count = pluginsd_process(cd->host, cd, fp_child_input, fp_child_output, 0);
count = pluginsd_process(cd->host, cd, cd->unsafe.pi->child_stdin_fp, cd->unsafe.pi->child_stdout_fp, 0);
nd_log(NDLS_DAEMON, NDLP_DEBUG,
"PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).",
rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, count);
killpid(cd->unsafe.pid);
int worker_ret_code = netdata_pclose(fp_child_input, fp_child_output, cd->unsafe.pid);
int worker_ret_code = spawn_popen_kill(cd->unsafe.pi);
cd->unsafe.pi = NULL;
if(likely(worker_ret_code == 0))
pluginsd_worker_thread_handle_success(cd);
@ -273,7 +263,7 @@ void *pluginsd_main(void *ptr) {
if (unlikely(!service_running(SERVICE_COLLECTORS)))
break;
errno = 0;
errno_clear();
DIR *dir = opendir(directory_name);
if (unlikely(!dir)) {
if (directory_errors[idx] != errno) {

View file

@ -34,6 +34,7 @@ struct plugind {
bool running; // do not touch this structure after setting this to 1
bool enabled; // if this is enabled or not
ND_THREAD *thread;
POPEN_INSTANCE *pi;
pid_t pid;
} unsafe;

View file

@ -13,7 +13,7 @@ ssize_t send_to_plugin(const char *txt, void *data) {
return h2o_stream_write(parser->h2o_ctx, txt, strlen(txt));
#endif
errno = 0;
errno_clear();
spinlock_lock(&parser->writer.spinlock);
ssize_t bytes = -1;

View file

@ -29,7 +29,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
static ARL_BASE *arl_base = NULL;
static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL, *arl_hugepages_total = NULL,
*arl_zswapped = NULL, *arl_high_low = NULL, *arl_cma_total = NULL,
*arl_zswapped = NULL, *arl_high_low = NULL,
*arl_directmap4k = NULL, *arl_directmap2m = NULL, *arl_directmap4m = NULL, *arl_directmap1g = NULL;
static unsigned long long
@ -189,7 +189,7 @@ int do_proc_meminfo(int update_every, usec_t dt) {
arl_expect(arl_base, "FilePmdMapped", &FilePmdMapped);
// CONFIG_CMA
arl_cma_total = arl_expect(arl_base, "CmaTotal", &CmaTotal);
arl_expect(arl_base, "CmaTotal", &CmaTotal);
arl_expect(arl_base, "CmaFree", &CmaFree);
// CONFIG_UNACCEPTED_MEMORY

View file

@ -1037,7 +1037,7 @@ static ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_one_file(
struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) {
sd_journal *j = NULL;
errno = 0;
errno_clear();
fstat_cache_enable_on_thread();

View file

@ -834,7 +834,7 @@ static inline void tc_split_words(char *str, char **words, int max_words) {
while(i < max_words) words[i++] = NULL;
}
static pid_t tc_child_pid = 0;
static POPEN_INSTANCE *tc_child_instance = NULL;
static void tc_main_cleanup(void *pptr) {
struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr);
@ -847,16 +847,10 @@ static void tc_main_cleanup(void *pptr) {
collector_info("cleaning up...");
if(tc_child_pid) {
collector_info("TC: killing with SIGTERM tc-qos-helper process %d", tc_child_pid);
if(killpid(tc_child_pid) != -1) {
siginfo_t info;
collector_info("TC: waiting for tc plugin child process pid %d to exit...", tc_child_pid);
netdata_waitid(P_PID, (id_t) tc_child_pid, &info, WEXITED);
}
tc_child_pid = 0;
if(tc_child_instance) {
collector_info("TC: stopping the running tc-qos-helper script");
int code = spawn_popen_wait(tc_child_instance); (void)code;
tc_child_instance = NULL;
}
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
@ -921,21 +915,20 @@ void *tc_main(void *ptr) {
char *tc_script = config_get("plugin:tc", "script to run to get tc values", command);
while(service_running(SERVICE_COLLECTORS)) {
FILE *fp_child_input, *fp_child_output;
struct tc_device *device = NULL;
struct tc_class *class = NULL;
snprintfz(command, TC_LINE_MAX, "exec %s %d", tc_script, localhost->rrd_update_every);
netdata_log_debug(D_TC_LOOP, "executing '%s'", command);
fp_child_output = netdata_popen(command, (pid_t *)&tc_child_pid, &fp_child_input);
if(unlikely(!fp_child_output)) {
tc_child_instance = spawn_popen_run(command);
if(!tc_child_instance) {
collector_error("TC: Cannot popen(\"%s\", \"r\").", command);
goto cleanup;
}
char buffer[TC_LINE_MAX+1] = "";
while(fgets(buffer, TC_LINE_MAX, fp_child_output) != NULL) {
while(fgets(buffer, TC_LINE_MAX, tc_child_instance->child_stdout_fp) != NULL) {
if(unlikely(!service_running(SERVICE_COLLECTORS))) break;
buffer[TC_LINE_MAX] = '\0';
@ -1142,8 +1135,8 @@ void *tc_main(void *ptr) {
}
// fgets() failed or loop broke
int code = netdata_pclose(fp_child_input, fp_child_output, (pid_t)tc_child_pid);
tc_child_pid = 0;
int code = spawn_popen_kill(tc_child_instance);
tc_child_instance = NULL;
if(unlikely(device)) {
// tc_device_free(device);

View file

@ -986,7 +986,7 @@ int main(int argc, char **argv) {
netdata_log_error("xenstat.plugin: ignoring parameter '%s'", argv[i]);
}
errno = 0;
errno_clear();
if(freq >= netdata_update_every)
netdata_update_every = freq;

View file

@ -326,18 +326,15 @@ void analytics_alarms_notifications(void)
strcat(script, " dump_methods");
pid_t command_pid;
netdata_log_debug(D_ANALYTICS, "Executing %s", script);
BUFFER *b = buffer_create(1000, NULL);
int cnt = 0;
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
if (fp_child_output) {
POPEN_INSTANCE *instance = spawn_popen_run(script);
if (instance) {
char line[200 + 1];
while (fgets(line, 200, fp_child_output) != NULL) {
while (fgets(line, 200, instance->child_stdout_fp) != NULL) {
char *end = line;
while (*end && *end != '\n')
end++;
@ -350,7 +347,7 @@ void analytics_alarms_notifications(void)
cnt++;
}
netdata_pclose(fp_child_input, fp_child_output, command_pid);
spawn_popen_wait(instance);
}
freez(script);
@ -1001,8 +998,6 @@ void analytics_statistic_send(const analytics_statistic_t *statistic) {
char *command_to_run = mallocz(
sizeof(char) * (strlen(statistic->action) + strlen(action_result) + strlen(action_data) + strlen(as_script) +
analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15));
pid_t command_pid;
sprintf(
command_to_run,
"%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ",
@ -1055,12 +1050,11 @@ void analytics_statistic_send(const analytics_statistic_t *statistic) {
"%s '%s' '%s' '%s'",
as_script, statistic->action, action_result, action_data);
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input);
if (fp_child_output) {
POPEN_INSTANCE *instance = spawn_popen_run(command_to_run);
if (instance) {
char buffer[4 + 1];
char *s = fgets(buffer, 4, fp_child_output);
int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
char *s = fgets(buffer, 4, instance->child_stdout_fp);
int exit_code = spawn_popen_wait(instance);
if (exit_code)
nd_log(NDLS_DAEMON, NDLP_NOTICE,

View file

@ -75,6 +75,7 @@ typedef enum __attribute__((packed)) {
BIB_LIB_LIBCAP,
BIB_LIB_LIBCRYPTO,
BIB_LIB_LIBYAML,
BIB_LIB_LIBMNL,
BIB_PLUGIN_APPS,
BIB_PLUGIN_LINUX_CGROUPS,
BIB_PLUGIN_LINUX_CGROUP_NETWORK,
@ -698,6 +699,14 @@ static struct {
.json = "libyaml",
.value = NULL,
},
[BIB_LIB_LIBMNL] = {
.category = BIC_LIBS,
.type = BIT_BOOLEAN,
.analytics = "libmnl",
.print = "libmnl (library for working with netfilter)",
.json = "libmnl",
.value = NULL,
},
[BIB_PLUGIN_APPS] = {
.category = BIC_PLUGINS,
.type = BIT_BOOLEAN,
@ -1177,6 +1186,9 @@ __attribute__((constructor)) void initialize_build_info(void) {
#ifdef HAVE_LIBYAML
build_info_set_status(BIB_LIB_LIBYAML, true);
#endif
#ifdef HAVE_LIBMNL
build_info_set_status(BIB_LIB_LIBMNL, true);
#endif
#ifdef ENABLE_PLUGIN_APPS
build_info_set_status(BIB_PLUGIN_APPS, true);
@ -1278,9 +1290,18 @@ static void populate_system_info(void) {
system_info = localhost->system_info;
}
else {
bool started_spawn_server = false;
if(!netdata_main_spawn_server) {
started_spawn_server = true;
netdata_main_spawn_server_init(NULL, 0, NULL);
}
system_info = callocz(1, sizeof(struct rrdhost_system_info));
get_system_info(system_info);
free_system_info = true;
if(started_spawn_server)
netdata_main_spawn_server_cleanup();
}
build_info_set_value_strdupz(BIB_OS_KERNEL_NAME, system_info->kernel_name);

View file

@ -44,7 +44,7 @@ long get_netdata_cpus(void) {
long cores_user_configured = config_get_number(CONFIG_SECTION_GLOBAL, "cpu cores", processors);
errno = 0;
errno_clear();
internal_error(true,
"System CPUs: %ld, ("
"system: %ld, cgroups cpuset v1: %ld, cgroups cpuset v2: %ld, netdata.conf: %ld"

View file

@ -84,9 +84,6 @@
// global GUID map functions
// netdata agent spawn server
#include "spawn/spawn.h"
// the netdata daemon
#include "daemon.h"
#include "main.h"

View file

@ -381,14 +381,14 @@ static void sched_setscheduler_set(void) {
priority = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process scheduling priority", priority);
#ifdef HAVE_SCHED_GET_PRIORITY_MIN
errno = 0;
errno_clear();
if(priority < sched_get_priority_min(policy)) {
netdata_log_error("scheduler %s (%d) priority %d is below the minimum %d. Using the minimum.", name, policy, priority, sched_get_priority_min(policy));
priority = sched_get_priority_min(policy);
}
#endif
#ifdef HAVE_SCHED_GET_PRIORITY_MAX
errno = 0;
errno_clear();
if(priority > sched_get_priority_max(policy)) {
netdata_log_error("scheduler %s (%d) priority %d is above the maximum %d. Using the maximum.", name, policy, priority, sched_get_priority_max(policy));
priority = sched_get_priority_max(policy);
@ -407,7 +407,7 @@ static void sched_setscheduler_set(void) {
.sched_priority = priority
};
errno = 0;
errno_clear();
i = sched_setscheduler(0, policy, &param);
if(i != 0) {
netdata_log_error("Cannot adjust netdata scheduling policy to %s (%d), with priority %d. Falling back to nice.",

View file

@ -26,7 +26,6 @@ int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
bool ieee754_doubles = false;
time_t netdata_start_time = 0;
struct netdata_static_thread *static_threads;
bool i_am_the_spawn_server = false;
struct config netdata_config = {
.first_section = NULL,
@ -325,9 +324,6 @@ static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) {
void web_client_cache_destroy(void);
void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) {
if (i_am_the_spawn_server)
exit(ret);
watcher_shutdown_begin();
nd_log_limits_unlimited();
@ -490,9 +486,12 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re
#endif
watcher_step_complete(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES);
netdata_main_spawn_server_cleanup();
watcher_step_complete(WATCHER_STEP_ID_DESTROY_MAIN_SPAWN_SERVER);
(void) unlink(agent_incomplete_shutdown_file);
watcher_step_complete(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE);
watcher_shutdown_end();
watcher_thread_stop();
@ -621,39 +620,6 @@ void web_server_config_options(void)
}
}
// killpid kills pid with SIGTERM.
int killpid(pid_t pid) {
int ret;
netdata_log_debug(D_EXIT, "Request to kill pid %d", pid);
int signal = SIGTERM;
//#ifdef NETDATA_INTERNAL_CHECKS
// if(service_running(SERVICE_COLLECTORS))
// signal = SIGABRT;
//#endif
errno = 0;
ret = kill(pid, signal);
if (ret == -1) {
switch(errno) {
case ESRCH:
// We wanted the process to exit so just let the caller handle.
return ret;
case EPERM:
netdata_log_error("Cannot kill pid %d, but I do not have enough permissions.", pid);
break;
default:
netdata_log_error("Cannot kill pid %d, but I received an error.", pid);
break;
}
}
return ret;
}
static void set_nofile_limit(struct rlimit *rl) {
// get the num files allowed
if(getrlimit(RLIMIT_NOFILE, rl) != 0) {
@ -1333,7 +1299,7 @@ static void post_conf_load(char **user)
}
static bool load_netdata_conf(char *filename, char overwrite_used, char **user) {
errno = 0;
errno_clear();
int ret = 0;
@ -1380,15 +1346,12 @@ int get_system_info(struct rrdhost_system_info *system_info) {
return 1;
}
pid_t command_pid;
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
if(fp_child_output) {
POPEN_INSTANCE *instance = spawn_popen_run(script);
if(instance) {
char line[200 + 1];
// Removed the double strlens, if the Coverity tainted string warning reappears I'll revert.
// One time init code, but I'm curious about the warning...
while (fgets(line, 200, fp_child_output) != NULL) {
while (fgets(line, 200, instance->child_stdout_fp) != NULL) {
char *value=line;
while (*value && *value != '=') value++;
if (*value=='=') {
@ -1407,7 +1370,7 @@ int get_system_info(struct rrdhost_system_info *system_info) {
}
}
}
netdata_pclose(fp_child_input, fp_child_output, command_pid);
spawn_popen_wait(instance);
}
freez(script);
#else
@ -1464,15 +1427,12 @@ int unittest_prepare_rrd(char **user) {
return 0;
}
int netdata_main(int argc, char **argv)
{
analytics_init();
string_init();
// initialize the system clocks
int netdata_main(int argc, char **argv) {
clocks_init();
netdata_start_time = now_realtime_sec();
string_init();
analytics_init();
netdata_start_time = now_realtime_sec();
usec_t started_ut = now_monotonic_usec();
usec_t last_ut = started_ut;
const char *prev_msg = NULL;
@ -1495,13 +1455,6 @@ int netdata_main(int argc, char **argv)
// set the name for logging
program_name = "netdata";
if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) {
// don't run netdata, this is the spawn server
i_am_the_spawn_server = true;
spawn_server();
exit(0);
}
// parse options
{
int num_opts = sizeof(option_definitions) / sizeof(struct option_def);
@ -1966,7 +1919,7 @@ int netdata_main(int argc, char **argv)
if (close_open_fds == true) {
// close all open file descriptors, except the standard ones
// the caller may have left open files (lxc-attach has this issue)
for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR);
os_close_all_non_std_open_fds_except(NULL, 0);
}
if(!config_loaded) {
@ -2196,6 +2149,7 @@ int netdata_main(int argc, char **argv)
(void)dont_fork;
#endif
netdata_main_spawn_server_init("plugins", argc, (const char **)argv);
watcher_thread_start();
// init sentry
@ -2228,10 +2182,6 @@ int netdata_main(int argc, char **argv)
// fork the spawn server
delta_startup_time("fork the spawn server");
#ifndef OS_WINDOWS
spawn_init();
#endif
/*
* Libuv uv_spawn() uses SIGCHLD internally:
* https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485
@ -2288,6 +2238,7 @@ int netdata_main(int argc, char **argv)
if (claiming_pending_arguments)
claim_agent(claiming_pending_arguments, false, NULL);
load_claiming_state();
// ------------------------------------------------------------------------

View file

@ -8,7 +8,6 @@
extern struct config netdata_config;
void cancel_main_threads(void);
int killpid(pid_t pid);
typedef enum {
ABILITY_DATA_QUERIES = (1 << 0),

View file

@ -118,61 +118,45 @@ void signals_reset(void) {
}
}
// reap_child reaps the child identified by pid.
static void reap_child(pid_t pid) {
siginfo_t i;
static void sigchild_handle() {
int status;
pid_t pid;
errno = 0;
netdata_log_debug(D_CHILDS, "SIGNAL: reap_child(%d)...", pid);
if (netdata_waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
if (errno != ECHILD)
netdata_log_error("SIGNAL: waitid(%d): failed to wait for child", pid);
else
netdata_log_info("SIGNAL: waitid(%d): failed - it seems the child is already reaped", pid);
return;
}
else if (i.si_pid == 0) {
// Process didn't exit, this shouldn't happen.
netdata_log_error("SIGNAL: waitid(%d): reports pid 0 - child has not exited", pid);
return;
}
// Loop to check for exited child processes
while ((pid = waitpid((pid_t)(-1), &status, WNOHANG)) != 0) {
if(pid == -1)
break;
switch (i.si_code) {
case CLD_EXITED:
netdata_log_info("SIGNAL: reap_child(%d) exited with code: %d", pid, i.si_status);
break;
case CLD_KILLED:
netdata_log_info("SIGNAL: reap_child(%d) killed by signal: %d", pid, i.si_status);
break;
case CLD_DUMPED:
netdata_log_info("SIGNAL: reap_child(%d) dumped core by signal: %d", pid, i.si_status);
break;
case CLD_STOPPED:
netdata_log_info("SIGNAL: reap_child(%d) stopped by signal: %d", pid, i.si_status);
break;
case CLD_TRAPPED:
netdata_log_info("SIGNAL: reap_child(%d) trapped by signal: %d", pid, i.si_status);
break;
case CLD_CONTINUED:
netdata_log_info("SIGNAL: reap_child(%d) continued by signal: %d", pid, i.si_status);
break;
default:
netdata_log_info("SIGNAL: reap_child(%d) gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
break;
}
}
// reap_children reaps all pending children which are not managed by myp.
static void reap_children() {
siginfo_t i;
while(1) {
i.si_pid = 0;
if (netdata_waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1 || i.si_pid == 0)
// nothing to do
return;
reap_child(i.si_pid);
if(WIFEXITED(status)) {
nd_log(NDLS_DAEMON, NDLP_INFO,
"DAEMON: child with pid %d exited normally with exit code %d",
pid, WEXITSTATUS(status));
}
else if(WIFSIGNALED(status)) {
if(WCOREDUMP(status))
nd_log(NDLS_DAEMON, NDLP_INFO,
"DAEMON: child with pid %d coredump'd due to signal %d",
pid, WTERMSIG(status));
else
nd_log(NDLS_DAEMON, NDLP_INFO,
"DAEMON: child with pid %d killed by signal %d",
pid, WTERMSIG(status));
}
else if(WIFSTOPPED(status)) {
nd_log(NDLS_DAEMON, NDLP_INFO,
"DAEMON: child with pid %d stopped due to signal %d",
pid, WSTOPSIG(status));
}
else if(WIFCONTINUED(status)) {
nd_log(NDLS_DAEMON, NDLP_INFO,
"DAEMON: child with pid %d continued due to signal %d",
pid, SIGCONT);
}
else {
nd_log(NDLS_COLLECTORS, NDLP_INFO,
"DAEMON: child with pid %d reports unhandled status",
pid);
}
}
}
@ -183,6 +167,7 @@ void signals_handle(void) {
// is delivered that either terminates the process or causes the invocation
// of a signal-catching function.
if(pause() == -1 && errno == EINTR) {
errno_clear();
// loop once, but keep looping while signals are coming in
// this is needed because a few operations may take some time
@ -227,7 +212,7 @@ void signals_handle(void) {
break;
case NETDATA_SIGNAL_CHILD:
reap_children();
sigchild_handle();
break;
default:

View file

@ -30,11 +30,7 @@ const struct netdata_static_thread static_threads_common[] = {
.name = "HEALTH",
.config_section = NULL,
.config_name = NULL,
#ifdef OS_WINDOWS
.enabled = 0,
#else
.enabled = 1,
#endif
.thread = NULL,
.init_routine = NULL,
.start_routine = health_main
@ -74,11 +70,7 @@ const struct netdata_static_thread static_threads_common[] = {
.name = "PLUGINSD",
.config_section = NULL,
.config_name = NULL,
#ifdef OS_WINDOWS
.enabled = 0,
#else
.enabled = 1,
#endif
.thread = NULL,
.init_routine = NULL,
.start_routine = pluginsd_main
@ -101,8 +93,6 @@ const struct netdata_static_thread static_threads_common[] = {
.init_routine = NULL,
.start_routine = statsd_main
},
#ifndef OS_WINDOWS
// this crashes the debugger under windows
{
.name = "EXPORTING",
.config_section = NULL,
@ -112,7 +102,6 @@ const struct netdata_static_thread static_threads_common[] = {
.init_routine = NULL,
.start_routine = exporting_main
},
#endif
{
.name = "SNDR[localhost]",
.config_section = NULL,

View file

@ -151,6 +151,8 @@ void watcher_thread_start() {
"remove pid file";
watcher_steps[WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES].msg =
"free openssl structures";
watcher_steps[WATCHER_STEP_ID_DESTROY_MAIN_SPAWN_SERVER].msg =
"destroy main spawn server";
watcher_steps[WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE].msg =
"remove incomplete shutdown file";

View file

@ -30,6 +30,7 @@ typedef enum {
WATCHER_STEP_ID_CLOSE_SQL_DATABASES,
WATCHER_STEP_ID_REMOVE_PID_FILE,
WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES,
WATCHER_STEP_ID_DESTROY_MAIN_SPAWN_SERVER,
WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE,
// Always keep this as the last enum value

View file

@ -172,7 +172,7 @@ static DWORD netdata_windows_get_current_build()
cBuild, 63, HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion", "CurrentBuild"))
return 0;
errno = 0;
errno_clear();
DWORD version = strtol(cBuild, NULL, 10);
if (errno == ERANGE)

View file

@ -219,7 +219,7 @@ static bool update_path() {
int main(int argc, char *argv[])
{
bool tty = isatty(fileno(stdout)) == 1;
bool tty = isatty(fileno(stdin)) == 1;
if (!update_path()) {
return 1;

View file

@ -1517,7 +1517,7 @@ static void *journal_v2_indexing_tp_worker(struct rrdengine_instance *ctx __mayb
break;
}
errno = 0;
errno_clear();
if(count)
nd_log(NDLS_DAEMON, NDLP_DEBUG,
"DBENGINE: journal indexing done; %u files processed",

View file

@ -1043,7 +1043,6 @@ struct alarm_entry {
STRING *recipient;
time_t exec_run_timestamp;
int exec_code;
uint64_t exec_spawn_serial;
STRING *source;
STRING *units;
@ -1069,6 +1068,8 @@ struct alarm_entry {
time_t last_repeat;
POPEN_INSTANCE *popen_instance;
struct alarm_entry *next;
struct alarm_entry *next_in_progress;
struct alarm_entry *prev_in_progress;

View file

@ -1494,18 +1494,16 @@ static void rrdhost_load_kubernetes_labels(void) {
return;
}
pid_t pid;
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen(label_script, &pid, &fp_child_input);
if(!fp_child_output) return;
POPEN_INSTANCE *instance = spawn_popen_run(label_script);
if(!instance) return;
char buffer[1000 + 1];
while (fgets(buffer, 1000, fp_child_output) != NULL)
while (fgets(buffer, 1000, instance->child_stdout_fp) != NULL)
rrdlabels_add_pair(localhost->rrdlabels, buffer, RRDLABEL_SRC_AUTO|RRDLABEL_SRC_K8S);
// Non-zero exit code means that all the script output is error messages. We've shown already any message that didn't include a ':'
// Here we'll inform with an ERROR that the script failed, show whatever (if anything) was added to the list of labels, free the memory and set the return to null
int rc = netdata_pclose(fp_child_input, fp_child_output, pid);
int rc = spawn_popen_wait(instance);
if(rc)
nd_log(NDLS_DAEMON, NDLP_ERR,
"%s exited abnormally. Failed to get kubernetes labels.",

View file

@ -43,7 +43,7 @@ int sql_init_context_database(int memory)
return 1;
}
errno = 0;
errno_clear();
netdata_log_info("SQLite database %s initialization", sqlite_database);
char buf[1024 + 1] = "";

View file

@ -518,7 +518,7 @@ static int migrate_database(sqlite3 *database, int target_version, char *db_name
}
if (likely(user_version == target_version)) {
errno = 0;
errno_clear();
netdata_log_info("%s database version is %d (no migration needed)", db_name, target_version);
return target_version;
}

View file

@ -461,7 +461,7 @@ void sql_alert_cleanup(bool cli)
{
UNUSED(cli);
errno = 0;
errno_clear();
if (sql_init_meta_database(DB_CHECK_NONE, 0)) {
netdata_log_error("Failed to open database");
return;

View file

@ -711,7 +711,7 @@ int sql_init_meta_database(db_check_action_type_t rebuild, int memory)
}
if (rebuild & DB_CHECK_ANALYZE) {
errno = 0;
errno_clear();
netdata_log_info("Running ANALYZE on %s", sqlite_database);
rc = sqlite3_exec_monitored(db_meta, "ANALYZE", 0, 0, &err_msg);
if (rc != SQLITE_OK) {
@ -725,7 +725,7 @@ int sql_init_meta_database(db_check_action_type_t rebuild, int memory)
return 1;
}
errno = 0;
errno_clear();
netdata_log_info("SQLite database %s initialization", sqlite_database);
rc = sqlite3_create_function(db_meta, "u2h", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, 0, sqlite_uuid_parse, 0, 0);

View file

@ -77,7 +77,7 @@ void simple_connector_receive_response(int *sock, struct instance *instance)
ERR_clear_error();
#endif
errno = 0;
errno_clear();
// loop through to collect all data
while (*sock != -1 && errno != EWOULDBLOCK) {

View file

@ -43,7 +43,7 @@ void health_log_alert_transition_with_trace(RRDHOST *host, ALARM_ENTRY *ae, int
};
ND_LOG_STACK_PUSH(lgs);
errno = 0;
errno_clear();
ND_LOG_FIELD_PRIORITY priority = NDLP_INFO;

View file

@ -23,7 +23,13 @@ void health_alarm_wait_for_execution(ALARM_ENTRY *ae) {
if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS))
return;
spawn_wait_cmd(ae->exec_spawn_serial, &ae->exec_code, &ae->exec_run_timestamp);
if(!ae->popen_instance) {
// nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not spawn a notification");
return;
}
ae->exec_code = spawn_popen_wait(ae->popen_instance);
netdata_log_debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code);
ae->flags &= ~HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS;
@ -75,7 +81,6 @@ static inline void enqueue_alarm_notify_in_progress(ALARM_ENTRY *ae)
alarm_notifications_in_progress.head = ae;
}
alarm_notifications_in_progress.tail = ae;
}
static bool prepare_command(BUFFER *wb,
@ -462,7 +467,7 @@ void health_send_notification(RRDHOST *host, ALARM_ENTRY *ae, struct health_rais
netdata_log_debug(D_HEALTH, "executing command '%s'", command_to_run);
ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS;
ae->exec_spawn_serial = spawn_enq_cmd(command_to_run);
ae->popen_instance = spawn_popen_run(command_to_run);
enqueue_alarm_notify_in_progress(ae);
health_alarm_log_save(host, ae);
} else {

View file

@ -343,7 +343,7 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) {
}
if(unlikely(now < next)) {
errno = 0;
errno_clear();
nd_log_limit_static_global_var(erl, 10, 0);
nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
"heartbeat clock: woke up %"PRIu64" microseconds earlier than expected "
@ -351,7 +351,7 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) {
next - now);
}
else if(unlikely(now - next > tick / 2)) {
errno = 0;
errno_clear();
nd_log_limit_static_global_var(erl, 10, 0);
nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
"heartbeat clock: woke up %"PRIu64" microseconds later than expected "

View file

@ -1248,7 +1248,7 @@ cleanup:
close(fd);
}
if(mem == MAP_FAILED) return NULL;
errno = 0;
errno_clear();
return mem;
}
@ -1364,7 +1364,7 @@ int verify_netdata_host_prefix(bool log_msg) {
char buffer[FILENAME_MAX + 1];
char *path = netdata_configured_host_prefix;
char *reason = "unknown reason";
errno = 0;
errno_clear();
struct stat sb;
if (stat(path, &sb) == -1) {
@ -1679,19 +1679,17 @@ char *find_and_replace(const char *src, const char *find, const char *replace, c
return value;
}
BUFFER *run_command_and_get_output_to_buffer(const char *command, int max_line_length) {
BUFFER *wb = buffer_create(0, NULL);
pid_t pid;
FILE *fp = netdata_popen(command, &pid, NULL);
if(fp) {
POPEN_INSTANCE *pi = spawn_popen_run(command);
if(pi) {
char buffer[max_line_length + 1];
while (fgets(buffer, max_line_length, fp)) {
while (fgets(buffer, max_line_length, pi->child_stdout_fp)) {
buffer[max_line_length] = '\0';
buffer_strcat(wb, buffer);
}
spawn_popen_kill(pi);
}
else {
buffer_free(wb);
@ -1699,103 +1697,27 @@ BUFFER *run_command_and_get_output_to_buffer(const char *command, int max_line_l
return NULL;
}
netdata_pclose(NULL, fp, pid);
return wb;
}
bool run_command_and_copy_output_to_stdout(const char *command, int max_line_length) {
pid_t pid;
FILE *fp = netdata_popen(command, &pid, NULL);
if(fp) {
POPEN_INSTANCE *pi = spawn_popen_run(command);
if(pi) {
char buffer[max_line_length + 1];
while (fgets(buffer, max_line_length, fp))
while (fgets(buffer, max_line_length, pi->child_stdout_fp))
fprintf(stdout, "%s", buffer);
spawn_popen_kill(pi);
}
else {
netdata_log_error("Failed to execute command '%s'.", command);
return false;
}
netdata_pclose(NULL, fp, pid);
return true;
}
static int fd_is_valid(int fd) {
return fcntl(fd, F_GETFD) != -1 || errno != EBADF;
}
void for_each_open_fd(OPEN_FD_ACTION action, OPEN_FD_EXCLUDE excluded_fds){
int fd;
switch(action){
case OPEN_FD_ACTION_CLOSE:
if(!(excluded_fds & OPEN_FD_EXCLUDE_STDIN)) (void)close(STDIN_FILENO);
if(!(excluded_fds & OPEN_FD_EXCLUDE_STDOUT)) (void)close(STDOUT_FILENO);
if(!(excluded_fds & OPEN_FD_EXCLUDE_STDERR)) (void)close(STDERR_FILENO);
#if defined(HAVE_CLOSE_RANGE)
if(close_range(STDERR_FILENO + 1, ~0U, 0) == 0) return;
nd_log(NDLS_DAEMON, NDLP_DEBUG, "close_range() failed, will try to close fds one by one");
#endif
break;
case OPEN_FD_ACTION_FD_CLOEXEC:
if(!(excluded_fds & OPEN_FD_EXCLUDE_STDIN)) (void)fcntl(STDIN_FILENO, F_SETFD, FD_CLOEXEC);
if(!(excluded_fds & OPEN_FD_EXCLUDE_STDOUT)) (void)fcntl(STDOUT_FILENO, F_SETFD, FD_CLOEXEC);
if(!(excluded_fds & OPEN_FD_EXCLUDE_STDERR)) (void)fcntl(STDERR_FILENO, F_SETFD, FD_CLOEXEC);
#if defined(HAVE_CLOSE_RANGE) && defined(CLOSE_RANGE_CLOEXEC) // Linux >= 5.11, FreeBSD >= 13.1
if(close_range(STDERR_FILENO + 1, ~0U, CLOSE_RANGE_CLOEXEC) == 0) return;
nd_log(NDLS_DAEMON, NDLP_DEBUG, "close_range() failed, will try to mark fds for closing one by one");
#endif
break;
default:
break; // do nothing
}
DIR *dir = opendir("/proc/self/fd");
if (dir == NULL) {
struct rlimit rl;
int open_max = -1;
if(getrlimit(RLIMIT_NOFILE, &rl) == 0 && rl.rlim_max != RLIM_INFINITY) open_max = rl.rlim_max;
#ifdef _SC_OPEN_MAX
else open_max = sysconf(_SC_OPEN_MAX);
#endif
if (open_max == -1) open_max = 65535; // 65535 arbitrary default if everything else fails
for (fd = STDERR_FILENO + 1; fd < open_max; fd++) {
switch(action){
case OPEN_FD_ACTION_CLOSE:
if(fd_is_valid(fd)) (void)close(fd);
break;
case OPEN_FD_ACTION_FD_CLOEXEC:
(void)fcntl(fd, F_SETFD, FD_CLOEXEC);
break;
default:
break; // do nothing
}
}
} else {
struct dirent *entry;
while ((entry = readdir(dir)) != NULL) {
fd = str2i(entry->d_name);
if(unlikely((fd == STDIN_FILENO ) || (fd == STDOUT_FILENO) || (fd == STDERR_FILENO) )) continue;
switch(action){
case OPEN_FD_ACTION_CLOSE:
if(fd_is_valid(fd)) (void)close(fd);
break;
case OPEN_FD_ACTION_FD_CLOEXEC:
(void)fcntl(fd, F_SETFD, FD_CLOEXEC);
break;
default:
break; // do nothing
}
}
closedir(dir);
}
}
struct timing_steps {
const char *name;
usec_t time;

View file

@ -326,6 +326,9 @@ size_t judy_aral_structures(void);
#define GUID_LEN 36
#define PIPE_READ 0
#define PIPE_WRITE 1
#include "linked-lists.h"
#include "storage-point.h"
@ -425,7 +428,7 @@ char *find_and_replace(const char *src, const char *find, const char *replace, c
#define UNUSED_FUNCTION(x) UNUSED_##x
#endif
#define error_report(x, args...) do { errno = 0; netdata_log_error(x, ##args); } while(0)
#define error_report(x, args...) do { errno_clear(); netdata_log_error(x, ##args); } while(0)
// Taken from linux kernel
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@ -440,17 +443,6 @@ char *find_and_replace(const char *src, const char *find, const char *replace, c
bool run_command_and_copy_output_to_stdout(const char *command, int max_line_length);
struct web_buffer *run_command_and_get_output_to_buffer(const char *command, int max_line_length);
typedef enum {
OPEN_FD_ACTION_CLOSE,
OPEN_FD_ACTION_FD_CLOEXEC
} OPEN_FD_ACTION;
typedef enum {
OPEN_FD_EXCLUDE_STDIN = 0x01,
OPEN_FD_EXCLUDE_STDOUT = 0x02,
OPEN_FD_EXCLUDE_STDERR = 0x04
} OPEN_FD_EXCLUDE;
void for_each_open_fd(OPEN_FD_ACTION action, OPEN_FD_EXCLUDE excluded_fds);
#ifdef OS_WINDOWS
void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data);
#else
@ -483,7 +475,9 @@ extern char *netdata_configured_host_prefix;
#include "datetime/rfc3339.h"
#include "datetime/rfc7231.h"
#include "completion/completion.h"
#include "popen/popen.h"
#include "log/log.h"
#include "spawn_server/spawn_server.h"
#include "spawn_server/spawn_popen.h"
#include "simple_pattern/simple_pattern.h"
#ifdef ENABLE_HTTPS
# include "socket/security.h"
@ -491,7 +485,6 @@ extern char *netdata_configured_host_prefix;
#include "socket/socket.h"
#include "config/appconfig.h"
#include "log/journal.h"
#include "log/log.h"
#include "buffered_reader/buffered_reader.h"
#include "procfile/procfile.h"
#include "string/string.h"

View file

@ -6,6 +6,10 @@
#include "../libnetdata.h"
#if defined(OS_WINDOWS)
#include <windows.h>
#endif
#ifdef __FreeBSD__
#include <sys/endian.h>
#endif
@ -35,6 +39,16 @@ int aclklog_enabled = 0;
struct nd_log_source;
static bool nd_log_limit_reached(struct nd_log_source *source);
// ----------------------------------------------------------------------------
void errno_clear(void) {
errno = 0;
#if defined(OS_WINDOWS)
SetLastError(ERROR_SUCCESS);
#endif
}
// ----------------------------------------------------------------------------
// logging method
@ -514,6 +528,13 @@ int nd_log_health_fd(void) {
return STDERR_FILENO;
}
int nd_log_collectors_fd(void) {
if(nd_log.sources[NDLS_COLLECTORS].method == NDLM_FILE && nd_log.sources[NDLS_COLLECTORS].fd != -1)
return nd_log.sources[NDLS_COLLECTORS].fd;
return STDERR_FILENO;
}
void nd_log_set_user_settings(ND_LOG_SOURCES source, const char *setting) {
char buf[FILENAME_MAX + 100];
if(setting && *setting)
@ -1011,6 +1032,10 @@ static void errno_annotator(BUFFER *wb, const char *key, struct log_field *lf);
static void priority_annotator(BUFFER *wb, const char *key, struct log_field *lf);
static void timestamp_usec_annotator(BUFFER *wb, const char *key, struct log_field *lf);
#if defined(OS_WINDOWS)
static void winerror_annotator(BUFFER *wb, const char *key, struct log_field *lf);
#endif
// ----------------------------------------------------------------------------
typedef void (*annotator_t)(BUFFER *wb, const char *key, struct log_field *lf);
@ -1058,6 +1083,13 @@ static __thread struct log_field thread_log_fields[_NDF_MAX] = {
.logfmt = "errno",
.logfmt_annotator = errno_annotator,
},
#if defined(OS_WINDOWS)
[NDF_WINERROR] = {
.journal = "WINERROR",
.logfmt = "winerror",
.logfmt_annotator = winerror_annotator,
},
#endif
[NDF_INVOCATION_ID] = {
.journal = "INVOCATION_ID", // standard journald field
.logfmt = NULL,
@ -1563,6 +1595,45 @@ static void errno_annotator(BUFFER *wb, const char *key, struct log_field *lf) {
buffer_fast_strcat(wb, "\"", 1);
}
#if defined(OS_WINDOWS)
static void winerror_annotator(BUFFER *wb, const char *key, struct log_field *lf) {
DWORD errnum = log_field_to_uint64(lf);
if(errnum == 0)
return;
char buf[1024];
DWORD size = FormatMessageA(
FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
errnum,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
buf,
(DWORD)(sizeof(buf) - 1),
NULL
);
if(size > 0) {
// remove \r\n at the end
while(size > 0 && (buf[size - 1] == '\r' || buf[size - 1] == '\n'))
buf[--size] = '\0';
}
else
size = snprintf(buf, sizeof(buf) - 1, "unknown error code");
buf[size] = '\0';
if(buffer_strlen(wb))
buffer_fast_strcat(wb, " ", 1);
buffer_strcat(wb, key);
buffer_fast_strcat(wb, "=\"", 2);
buffer_print_int64(wb, errnum);
buffer_fast_strcat(wb, ", ", 2);
buffer_json_strcat(wb, buf);
buffer_fast_strcat(wb, "\"", 1);
}
#endif
static void priority_annotator(BUFFER *wb, const char *key, struct log_field *lf) {
uint64_t pri = log_field_to_uint64(lf);
@ -2099,8 +2170,8 @@ static void nd_logger_merge_log_stack_to_thread_fields(void) {
}
static void nd_logger(const char *file, const char *function, const unsigned long line,
ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, bool limit, int saved_errno,
const char *fmt, va_list ap) {
ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, bool limit,
int saved_errno, size_t saved_winerror __maybe_unused, const char *fmt, va_list ap) {
SPINLOCK *spinlock;
FILE *fp;
@ -2168,6 +2239,11 @@ static void nd_logger(const char *file, const char *function, const unsigned lon
if(saved_errno != 0 && !thread_log_fields[NDF_ERRNO].entry.set)
thread_log_fields[NDF_ERRNO].entry = ND_LOG_FIELD_I64(NDF_ERRNO, saved_errno);
#if defined(OS_WINDOWS)
if(saved_winerror != 0 && !thread_log_fields[NDF_WINERROR].entry.set)
thread_log_fields[NDF_WINERROR].entry = ND_LOG_FIELD_U64(NDF_WINERROR, saved_winerror);
#endif
CLEAN_BUFFER *wb = NULL;
if(fmt && !thread_log_fields[NDF_MESSAGE].entry.set) {
wb = buffer_create(1024, NULL);
@ -2215,7 +2291,7 @@ static void nd_logger(const char *file, const char *function, const unsigned lon
nd_log.sources[source].pending_msg = NULL;
}
errno = 0;
errno_clear();
}
static ND_LOG_SOURCES nd_log_validate_source(ND_LOG_SOURCES source) {
@ -2234,6 +2310,12 @@ static ND_LOG_SOURCES nd_log_validate_source(ND_LOG_SOURCES source) {
void netdata_logger(ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file, const char *function, unsigned long line, const char *fmt, ... )
{
int saved_errno = errno;
size_t saved_winerror = 0;
#if defined(OS_WINDOWS)
saved_winerror = GetLastError();
#endif
source = nd_log_validate_source(source);
if (source != NDLS_DEBUG && priority > nd_log.sources[source].min_priority)
@ -2243,12 +2325,18 @@ void netdata_logger(ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const
va_start(args, fmt);
nd_logger(file, function, line, source, priority,
source == NDLS_DAEMON || source == NDLS_COLLECTORS,
saved_errno, fmt, args);
saved_errno, saved_winerror, fmt, args);
va_end(args);
}
void netdata_logger_with_limit(ERROR_LIMIT *erl, ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) {
int saved_errno = errno;
size_t saved_winerror = 0;
#if defined(OS_WINDOWS)
saved_winerror = GetLastError();
#endif
source = nd_log_validate_source(source);
if (source != NDLS_DEBUG && priority > nd_log.sources[source].min_priority)
@ -2272,7 +2360,7 @@ void netdata_logger_with_limit(ERROR_LIMIT *erl, ND_LOG_SOURCES source, ND_LOG_F
va_start(args, fmt);
nd_logger(file, function, line, source, priority,
source == NDLS_DAEMON || source == NDLS_COLLECTORS,
saved_errno, fmt, args);
saved_errno, saved_winerror, fmt, args);
va_end(args);
erl->last_logged = now;
erl->count = 0;
@ -2280,12 +2368,18 @@ void netdata_logger_with_limit(ERROR_LIMIT *erl, ND_LOG_SOURCES source, ND_LOG_F
void netdata_logger_fatal( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) {
int saved_errno = errno;
size_t saved_winerror = 0;
#if defined(OS_WINDOWS)
saved_winerror = GetLastError();
#endif
ND_LOG_SOURCES source = NDLS_DAEMON;
source = nd_log_validate_source(source);
va_list args;
va_start(args, fmt);
nd_logger(file, function, line, source, NDLP_ALERT, true, saved_errno, fmt, args);
nd_logger(file, function, line, source, NDLP_ALERT, true, saved_errno, saved_winerror, fmt, args);
va_end(args);
char date[LOG_DATE_LENGTH];

View file

@ -46,6 +46,9 @@ typedef enum __attribute__((__packed__)) {
NDF_LOG_SOURCE, // DAEMON, COLLECTORS, HEALTH, ACCESS, ACLK - set at the log call
NDF_PRIORITY, // the syslog priority (severity) - set at the log call
NDF_ERRNO, // the ERRNO at the time of the log call - added automatically
#if defined(OS_WINDOWS)
NDF_WINERROR, // Windows GetLastError()
#endif
NDF_INVOCATION_ID, // the INVOCATION_ID of Netdata - added automatically
NDF_LINE, // the source code file line number - added automatically
NDF_FILE, // the source code filename - added automatically
@ -141,6 +144,7 @@ typedef enum __attribute__((__packed__)) {
NDFT_CALLBACK,
} ND_LOG_STACK_FIELD_TYPE;
void errno_clear(void);
void nd_log_set_user_settings(ND_LOG_SOURCES source, const char *setting);
void nd_log_set_facility(const char *facility);
void nd_log_set_priority_level(const char *setting);
@ -157,6 +161,7 @@ const char *nd_log_id2priority(ND_LOG_FIELD_PRIORITY priority);
const char *nd_log_method_for_external_plugins(const char *s);
int nd_log_health_fd(void);
int nd_log_collectors_fd(void);
typedef bool (*log_formatter_callback_t)(BUFFER *wb, void *data);
struct log_stack_entry {

View file

@ -5,10 +5,8 @@
#include "libnetdata/libnetdata.h"
// disable libmnl for the moment
#undef HAVE_LIBMNL
#ifdef HAVE_LIBMNL
#include <linux/rtnetlink.h>
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
#include <linux/unix_diag.h>
@ -67,30 +65,41 @@ struct local_port;
struct local_socket_state;
typedef void (*local_sockets_cb_t)(struct local_socket_state *state, struct local_socket *n, void *data);
struct local_sockets_config {
bool listening;
bool inbound;
bool outbound;
bool local;
bool tcp4;
bool tcp6;
bool udp4;
bool udp6;
bool pid;
bool cmdline;
bool comm;
bool uid;
bool namespaces;
bool tcp_info;
size_t max_errors;
size_t max_concurrent_namespaces;
local_sockets_cb_t cb;
void *data;
const char *host_prefix;
// internal use
uint64_t net_ns_inode;
};
typedef struct local_socket_state {
struct {
bool listening;
bool inbound;
bool outbound;
bool local;
bool tcp4;
bool tcp6;
bool udp4;
bool udp6;
bool pid;
bool cmdline;
bool comm;
bool uid;
bool namespaces;
size_t max_errors;
local_sockets_cb_t cb;
void *data;
const char *host_prefix;
} config;
struct local_sockets_config config;
struct {
size_t mnl_sends;
size_t namespaces_found;
size_t tcp_info_received;
size_t pid_fds_processed;
size_t pid_fds_opendir_failed;
size_t pid_fds_readlink_failed;
@ -98,6 +107,9 @@ typedef struct local_socket_state {
size_t errors_encountered;
} stats;
bool spawn_server_is_mine;
SPAWN_SERVER *spawn_server;
#ifdef HAVE_LIBMNL
bool use_nl;
struct mnl_socket *nl;
@ -106,6 +118,7 @@ typedef struct local_socket_state {
ARAL *local_socket_aral;
ARAL *pid_socket_aral;
SPINLOCK spinlock; // for namespaces
uint64_t proc_self_net_ns_inode;
@ -181,12 +194,21 @@ typedef struct local_socket {
SOCKET_DIRECTION direction;
uint8_t timer;
uint8_t retransmits;
uint8_t retransmits; // the # of packets currently queued for retransmission (not yet acknowledged)
uint32_t expires;
uint32_t rqueue;
uint32_t wqueue;
uid_t uid;
struct {
bool checked;
bool ipv46;
} ipv6ony;
union {
struct tcp_info tcp;
} info;
char comm[TASK_COMM_LEN];
STRING *cmdline;
@ -201,16 +223,18 @@ typedef struct local_socket {
#endif
} LOCAL_SOCKET;
static inline void local_sockets_spawn_server_callback(SPAWN_REQUEST *request);
// --------------------------------------------------------------------------------------------------------------------
static inline void local_sockets_log(LS_STATE *ls, const char *format, ...) PRINTFLIKE(2, 3);
static inline void local_sockets_log(LS_STATE *ls, const char *format, ...) {
if(++ls->stats.errors_encountered == ls->config.max_errors) {
if(ls && ++ls->stats.errors_encountered == ls->config.max_errors) {
nd_log(NDLS_COLLECTORS, NDLP_ERR, "LOCAL-SOCKETS: max number of logs reached. Not logging anymore");
return;
}
if(ls->stats.errors_encountered > ls->config.max_errors)
if(ls && ls->stats.errors_encountered > ls->config.max_errors)
return;
char buf[16384];
@ -224,6 +248,133 @@ static inline void local_sockets_log(LS_STATE *ls, const char *format, ...) {
// --------------------------------------------------------------------------------------------------------------------
static bool local_sockets_is_ipv4_mapped_ipv6_address(const struct in6_addr *addr) {
// An IPv4-mapped IPv6 address starts with 80 bits of zeros followed by 16 bits of ones
static const unsigned char ipv4_mapped_prefix[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF };
return memcmp(addr->s6_addr, ipv4_mapped_prefix, 12) == 0;
}
static bool local_sockets_is_loopback_address(struct socket_endpoint *se) {
if (se->family == AF_INET) {
// For IPv4, loopback addresses are in the 127.0.0.0/8 range
return (ntohl(se->ip.ipv4) >> 24) == 127; // Check if the first byte is 127
} else if (se->family == AF_INET6) {
// Check if the address is an IPv4-mapped IPv6 address
if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) {
// Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range
uint8_t *ip6 = (uint8_t *)&se->ip.ipv6;
const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12));
return (ntohl(ipv4_addr) >> 24) == 127;
}
// For IPv6, loopback address is ::1
return memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0;
}
return false;
}
static inline bool local_sockets_is_ipv4_reserved_address(uint32_t ip) {
// Check for the reserved address ranges
ip = ntohl(ip);
return (
(ip >> 24 == 10) || // Private IP range (A class)
(ip >> 20 == (172 << 4) + 1) || // Private IP range (B class)
(ip >> 16 == (192 << 8) + 168) || // Private IP range (C class)
(ip >> 24 == 127) || // Loopback address (127.0.0.0)
(ip >> 24 == 0) || // Reserved (0.0.0.0)
(ip >> 24 == 169 && (ip >> 16) == 254) || // Link-local address (169.254.0.0)
(ip >> 16 == (192 << 8) + 0) // Test-Net (192.0.0.0)
);
}
static inline bool local_sockets_is_private_address(struct socket_endpoint *se) {
if (se->family == AF_INET) {
return local_sockets_is_ipv4_reserved_address(se->ip.ipv4);
}
else if (se->family == AF_INET6) {
uint8_t *ip6 = (uint8_t *)&se->ip.ipv6;
// Check if the address is an IPv4-mapped IPv6 address
if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) {
// Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range
const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12));
return local_sockets_is_ipv4_reserved_address(ipv4_addr);
}
// Check for link-local addresses (fe80::/10)
if ((ip6[0] == 0xFE) && ((ip6[1] & 0xC0) == 0x80))
return true;
// Check for Unique Local Addresses (ULA) (fc00::/7)
if ((ip6[0] & 0xFE) == 0xFC)
return true;
// Check for multicast addresses (ff00::/8)
if (ip6[0] == 0xFF)
return true;
// For IPv6, loopback address is :: or ::1
return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0 ||
memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0;
}
return false;
}
static bool local_sockets_is_multicast_address(struct socket_endpoint *se) {
if (se->family == AF_INET) {
// For IPv4, check if the address is 0.0.0.0
uint32_t ip = htonl(se->ip.ipv4);
return (ip >= 0xE0000000 && ip <= 0xEFFFFFFF); // Multicast address range (224.0.0.0/4)
}
else if (se->family == AF_INET6) {
// For IPv6, check if the address is ff00::/8
uint8_t *ip6 = (uint8_t *)&se->ip.ipv6;
return ip6[0] == 0xff;
}
return false;
}
static bool local_sockets_is_zero_address(struct socket_endpoint *se) {
if (se->family == AF_INET) {
// For IPv4, check if the address is 0.0.0.0
return se->ip.ipv4 == 0;
}
else if (se->family == AF_INET6) {
// For IPv6, check if the address is ::
return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0;
}
return false;
}
static inline const char *local_sockets_address_space(struct socket_endpoint *se) {
if(local_sockets_is_zero_address(se))
return "zero";
else if(local_sockets_is_loopback_address(se))
return "loopback";
else if(local_sockets_is_multicast_address(se))
return "multicast";
else if(local_sockets_is_private_address(se))
return "private";
else
return "public";
}
// --------------------------------------------------------------------------------------------------------------------
static inline bool is_local_socket_ipv46(LOCAL_SOCKET *n) {
return n->local.family == AF_INET6 &&
n->direction == SOCKET_DIRECTION_LISTEN &&
local_sockets_is_zero_address(&n->local) &&
n->ipv6ony.checked &&
n->ipv6ony.ipv46;
}
// --------------------------------------------------------------------------------------------------------------------
static void local_sockets_foreach_local_socket_call_cb(LS_STATE *ls) {
for(SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_first_read_only_LOCAL_SOCKET(&ls->sockets_hashtable);
sl;
@ -425,123 +576,6 @@ static inline bool local_sockets_find_all_sockets_in_proc(LS_STATE *ls, const ch
// --------------------------------------------------------------------------------------------------------------------
static bool local_sockets_is_ipv4_mapped_ipv6_address(const struct in6_addr *addr) {
// An IPv4-mapped IPv6 address starts with 80 bits of zeros followed by 16 bits of ones
static const unsigned char ipv4_mapped_prefix[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF };
return memcmp(addr->s6_addr, ipv4_mapped_prefix, 12) == 0;
}
static bool local_sockets_is_loopback_address(struct socket_endpoint *se) {
if (se->family == AF_INET) {
// For IPv4, loopback addresses are in the 127.0.0.0/8 range
return (ntohl(se->ip.ipv4) >> 24) == 127; // Check if the first byte is 127
} else if (se->family == AF_INET6) {
// Check if the address is an IPv4-mapped IPv6 address
if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) {
// Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range
uint8_t *ip6 = (uint8_t *)&se->ip.ipv6;
const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12));
return (ntohl(ipv4_addr) >> 24) == 127;
}
// For IPv6, loopback address is ::1
return memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0;
}
return false;
}
static inline bool local_sockets_is_ipv4_reserved_address(uint32_t ip) {
// Check for the reserved address ranges
ip = ntohl(ip);
return (
(ip >> 24 == 10) || // Private IP range (A class)
(ip >> 20 == (172 << 4) + 1) || // Private IP range (B class)
(ip >> 16 == (192 << 8) + 168) || // Private IP range (C class)
(ip >> 24 == 127) || // Loopback address (127.0.0.0)
(ip >> 24 == 0) || // Reserved (0.0.0.0)
(ip >> 24 == 169 && (ip >> 16) == 254) || // Link-local address (169.254.0.0)
(ip >> 16 == (192 << 8) + 0) // Test-Net (192.0.0.0)
);
}
static inline bool local_sockets_is_private_address(struct socket_endpoint *se) {
if (se->family == AF_INET) {
return local_sockets_is_ipv4_reserved_address(se->ip.ipv4);
}
else if (se->family == AF_INET6) {
uint8_t *ip6 = (uint8_t *)&se->ip.ipv6;
// Check if the address is an IPv4-mapped IPv6 address
if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) {
// Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range
const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12));
return local_sockets_is_ipv4_reserved_address(ipv4_addr);
}
// Check for link-local addresses (fe80::/10)
if ((ip6[0] == 0xFE) && ((ip6[1] & 0xC0) == 0x80))
return true;
// Check for Unique Local Addresses (ULA) (fc00::/7)
if ((ip6[0] & 0xFE) == 0xFC)
return true;
// Check for multicast addresses (ff00::/8)
if (ip6[0] == 0xFF)
return true;
// For IPv6, loopback address is :: or ::1
return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0 ||
memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0;
}
return false;
}
static bool local_sockets_is_multicast_address(struct socket_endpoint *se) {
if (se->family == AF_INET) {
// For IPv4, check if the address is 0.0.0.0
uint32_t ip = htonl(se->ip.ipv4);
return (ip >= 0xE0000000 && ip <= 0xEFFFFFFF); // Multicast address range (224.0.0.0/4)
}
else if (se->family == AF_INET6) {
// For IPv6, check if the address is ff00::/8
uint8_t *ip6 = (uint8_t *)&se->ip.ipv6;
return ip6[0] == 0xff;
}
return false;
}
static bool local_sockets_is_zero_address(struct socket_endpoint *se) {
if (se->family == AF_INET) {
// For IPv4, check if the address is 0.0.0.0
return se->ip.ipv4 == 0;
}
else if (se->family == AF_INET6) {
// For IPv6, check if the address is ::
return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0;
}
return false;
}
static inline const char *local_sockets_address_space(struct socket_endpoint *se) {
if(local_sockets_is_zero_address(se))
return "zero";
else if(local_sockets_is_loopback_address(se))
return "loopback";
else if(local_sockets_is_multicast_address(se))
return "multicast";
else if(local_sockets_is_private_address(se))
return "private";
else
return "public";
}
// --------------------------------------------------------------------------------------------------------------------
static inline void local_sockets_index_listening_port(LS_STATE *ls, LOCAL_SOCKET *n) {
if(n->direction & SOCKET_DIRECTION_LISTEN) {
// for the listening sockets, keep a hashtable with all the local ports
@ -636,28 +670,31 @@ static inline bool local_sockets_add_socket(LS_STATE *ls, LOCAL_SOCKET *tmp) {
#ifdef HAVE_LIBMNL
static inline void local_sockets_netlink_init(LS_STATE *ls) {
ls->use_nl = true;
static inline void local_sockets_libmnl_init(LS_STATE *ls) {
ls->nl = mnl_socket_open(NETLINK_INET_DIAG);
if (!ls->nl) {
local_sockets_log(ls, "cannot open netlink socket");
if (ls->nl == NULL) {
local_sockets_log(ls, "cannot open libmnl netlink socket");
ls->use_nl = false;
}
if (mnl_socket_bind(ls->nl, 0, MNL_SOCKET_AUTOPID) < 0) {
local_sockets_log(ls, "cannot bind netlink socket");
else if (mnl_socket_bind(ls->nl, 0, MNL_SOCKET_AUTOPID) < 0) {
local_sockets_log(ls, "cannot bind libmnl netlink socket");
mnl_socket_close(ls->nl);
ls->nl = NULL;
ls->use_nl = false;
}
else
ls->use_nl = true;
}
static inline void local_sockets_netlink_cleanup(LS_STATE *ls) {
static inline void local_sockets_libmnl_cleanup(LS_STATE *ls) {
if(ls->nl) {
mnl_socket_close(ls->nl);
ls->nl = NULL;
ls->use_nl = false;
}
}
static inline int local_sockets_netlink_cb_data(const struct nlmsghdr *nlh, void *data) {
static inline int local_sockets_libmnl_cb_data(const struct nlmsghdr *nlh, void *data) {
LS_STATE *ls = data;
struct inet_diag_msg *diag_msg = mnl_nlmsg_get_payload(nlh);
@ -666,15 +703,19 @@ static inline int local_sockets_netlink_cb_data(const struct nlmsghdr *nlh, void
.inode = diag_msg->idiag_inode,
.direction = SOCKET_DIRECTION_NONE,
.state = diag_msg->idiag_state,
.ipv6ony = {
.checked = false,
.ipv46 = false,
},
.local = {
.protocol = ls->tmp_protocol,
.family = diag_msg->idiag_family,
.port = diag_msg->id.idiag_sport,
.port = ntohs(diag_msg->id.idiag_sport),
},
.remote = {
.protocol = ls->tmp_protocol,
.family = diag_msg->idiag_family,
.port = diag_msg->id.idiag_dport,
.port = ntohs(diag_msg->id.idiag_dport),
},
.timer = diag_msg->idiag_timer,
.retransmits = diag_msg->idiag_retrans,
@ -693,12 +734,37 @@ static inline int local_sockets_netlink_cb_data(const struct nlmsghdr *nlh, void
memcpy(&n.remote.ip.ipv6, diag_msg->id.idiag_dst, sizeof(n.remote.ip.ipv6));
}
struct rtattr *attr = (struct rtattr *)(diag_msg + 1);
int rtattrlen = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*diag_msg));
for (; !n.ipv6ony.checked && RTA_OK(attr, rtattrlen); attr = RTA_NEXT(attr, rtattrlen)) {
switch (attr->rta_type) {
case INET_DIAG_INFO: {
if(ls->tmp_protocol == IPPROTO_TCP) {
struct tcp_info *info = (struct tcp_info *)RTA_DATA(attr);
n.info.tcp = *info;
ls->stats.tcp_info_received++;
}
}
break;
case INET_DIAG_SKV6ONLY: {
n.ipv6ony.checked = true;
int ipv6only = *(int *)RTA_DATA(attr);
n.ipv6ony.ipv46 = !ipv6only;
}
break;
default:
break;
}
}
local_sockets_add_socket(ls, &n);
return MNL_CB_OK;
}
static inline bool local_sockets_netlink_get_sockets(LS_STATE *ls, uint16_t family, uint16_t protocol) {
static inline bool local_sockets_libmnl_get_sockets(LS_STATE *ls, uint16_t family, uint16_t protocol) {
ls->tmp_protocol = protocol;
char buf[MNL_SOCKET_BUFFER_SIZE];
@ -710,14 +776,22 @@ static inline bool local_sockets_netlink_get_sockets(LS_STATE *ls, uint16_t fami
req.sdiag_family = family;
req.sdiag_protocol = protocol;
req.idiag_states = -1;
req.idiag_ext = 0;
if(family == AF_INET6)
req.idiag_ext |= 1 << (INET_DIAG_SKV6ONLY - 1);
if(protocol == IPPROTO_TCP && ls->config.tcp_info)
req.idiag_ext |= 1 << (INET_DIAG_INFO - 1);
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = SOCK_DIAG_BY_FAMILY;
nlh->nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST;
nlh->nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
nlh->nlmsg_seq = seq = time(NULL);
mnl_nlmsg_put_extra_header(nlh, sizeof(req));
memcpy(mnl_nlmsg_get_payload(nlh), &req, sizeof(req));
ls->stats.mnl_sends++;
if (mnl_socket_sendto(ls->nl, nlh, nlh->nlmsg_len) < 0) {
local_sockets_log(ls, "mnl_socket_send failed");
return false;
@ -725,7 +799,7 @@ static inline bool local_sockets_netlink_get_sockets(LS_STATE *ls, uint16_t fami
ssize_t ret;
while ((ret = mnl_socket_recvfrom(ls->nl, buf, sizeof(buf))) > 0) {
ret = mnl_cb_run(buf, ret, seq, portid, local_sockets_netlink_cb_data, ls);
ret = mnl_cb_run(buf, ret, seq, portid, local_sockets_libmnl_cb_data, ls);
if (ret <= MNL_CB_STOP)
break;
}
@ -774,6 +848,10 @@ static inline bool local_sockets_read_proc_net_x(LS_STATE *ls, const char *filen
LOCAL_SOCKET n = {
.direction = SOCKET_DIRECTION_NONE,
.ipv6ony = {
.checked = false,
.ipv46 = false,
},
.local = {
.family = family,
.protocol = protocol,
@ -904,6 +982,10 @@ static inline void local_sockets_detect_directions(LS_STATE *ls) {
// --------------------------------------------------------------------------------------------------------------------
static inline void local_sockets_init(LS_STATE *ls) {
ls->config.host_prefix = netdata_configured_host_prefix;
spinlock_init(&ls->spinlock);
simple_hashtable_init_NET_NS(&ls->ns_hashtable, 1024);
simple_hashtable_init_PID_SOCKET(&ls->pid_sockets_hashtable, 65535);
simple_hashtable_init_LOCAL_SOCKET(&ls->sockets_hashtable, 65535);
@ -923,9 +1005,36 @@ static inline void local_sockets_init(LS_STATE *ls) {
65536,
65536,
NULL, NULL, NULL, false, true);
memset(&ls->stats, 0, sizeof(ls->stats));
#ifdef HAVE_LIBMNL
ls->use_nl = false;
ls->nl = NULL;
ls->tmp_protocol = 0;
local_sockets_libmnl_init(ls);
#endif
if(ls->config.namespaces && ls->spawn_server == NULL) {
ls->spawn_server = spawn_server_create(NULL, local_sockets_spawn_server_callback, 0, NULL);
ls->spawn_server_is_mine = true;
}
else
ls->spawn_server_is_mine = false;
}
static inline void local_sockets_cleanup(LS_STATE *ls) {
if(ls->spawn_server_is_mine) {
spawn_server_destroy(ls->spawn_server);
ls->spawn_server = NULL;
ls->spawn_server_is_mine = false;
}
#ifdef HAVE_LIBMNL
local_sockets_libmnl_cleanup(ls);
#endif
// free the sockets hashtable data
for(SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_first_read_only_LOCAL_SOCKET(&ls->sockets_hashtable);
sl;
@ -963,8 +1072,8 @@ static inline void local_sockets_cleanup(LS_STATE *ls) {
static inline void local_sockets_do_family_protocol(LS_STATE *ls, const char *filename, uint16_t family, uint16_t protocol) {
#ifdef HAVE_LIBMNL
if(ls->use_nl) {
ls->use_nl = local_sockets_netlink_get_sockets(ls, family, protocol);
if(ls->nl && ls->use_nl) {
ls->use_nl = local_sockets_libmnl_get_sockets(ls, family, protocol);
if(ls->use_nl)
return;
@ -974,7 +1083,7 @@ static inline void local_sockets_do_family_protocol(LS_STATE *ls, const char *fi
local_sockets_read_proc_net_x(ls, filename, family, protocol);
}
static inline void local_sockets_read_sockets_from_proc(LS_STATE *ls) {
static inline void local_sockets_read_all_system_sockets(LS_STATE *ls) {
char path[FILENAME_MAX + 1];
if(ls->config.namespaces) {
@ -1036,7 +1145,52 @@ static inline void local_sockets_send_to_parent(struct local_socket_state *ls __
local_sockets_log(ls, "failed to write cmdline to pipe");
}
static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_socket *ps, pid_t *pid) {
static inline void local_sockets_spawn_server_callback(SPAWN_REQUEST *request) {
LS_STATE ls = { 0 };
ls.config = *((struct local_sockets_config *)request->data);
// we don't need these inside namespaces
ls.config.cmdline = false;
ls.config.comm = false;
ls.config.pid = false;
ls.config.namespaces = false;
// initialize local sockets
local_sockets_init(&ls);
ls.config.host_prefix = ""; // we need the /proc of the container
struct local_sockets_child_work cw = {
.net_ns_inode = ls.proc_self_net_ns_inode,
.fd = request->fds[1], // stdout
};
ls.config.cb = local_sockets_send_to_parent;
ls.config.data = &cw;
ls.proc_self_net_ns_inode = ls.config.net_ns_inode;
// switch namespace using the custom fd passed via the spawn server
if (setns(request->fds[3], CLONE_NEWNET) == -1) {
local_sockets_log(&ls, "failed to switch network namespace at child process using fd %d", request->fds[3]);
exit(EXIT_FAILURE);
}
// read all sockets from /proc
local_sockets_read_all_system_sockets(&ls);
// send all sockets to parent
local_sockets_foreach_local_socket_call_cb(&ls);
// send the terminating socket
struct local_socket zero = {
.net_ns_inode = ls.config.net_ns_inode,
};
local_sockets_send_to_parent(&ls, &zero, &cw);
exit(EXIT_SUCCESS);
}
static inline bool local_sockets_get_namespace_sockets_with_pid(LS_STATE *ls, struct pid_socket *ps) {
char filename[1024];
snprintfz(filename, sizeof(filename), "%s/proc/%d/ns/net", ls->config.host_prefix, ps->pid);
@ -1060,80 +1214,32 @@ static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_
return false;
}
int pipefd[2];
if (pipe(pipefd) != 0) {
local_sockets_log(ls, "cannot create pipe");
if(ls->spawn_server == NULL) {
close(fd);
local_sockets_log(ls, "spawn server is not available");
return false;
}
*pid = fork();
if (*pid == 0) {
// Child process
close(pipefd[0]);
struct local_sockets_config config = ls->config;
config.net_ns_inode = ps->net_ns_inode;
SPAWN_INSTANCE *si = spawn_server_exec(ls->spawn_server, STDERR_FILENO, fd, NULL, &config, sizeof(config), SPAWN_INSTANCE_TYPE_CALLBACK);
close(fd); fd = -1;
// local_sockets_log(ls, "child is here for inode %"PRIu64" and namespace %"PRIu64, ps->inode, ps->net_ns_inode);
struct local_sockets_child_work cw = {
.net_ns_inode = ps->net_ns_inode,
.fd = pipefd[1],
};
ls->config.host_prefix = ""; // we need the /proc of the container
ls->config.cb = local_sockets_send_to_parent;
ls->config.data = &cw;
ls->config.cmdline = false; // we have these already
ls->config.comm = false; // we have these already
ls->config.pid = false; // we have these already
ls->config.namespaces = false;
ls->proc_self_net_ns_inode = ps->net_ns_inode;
// switch namespace
if (setns(fd, CLONE_NEWNET) == -1) {
local_sockets_log(ls, "failed to switch network namespace at child process");
exit(EXIT_FAILURE);
}
#ifdef HAVE_LIBMNL
local_sockets_netlink_cleanup(ls);
local_sockets_netlink_init(ls);
#endif
// read all sockets from /proc
local_sockets_read_sockets_from_proc(ls);
// send all sockets to parent
local_sockets_foreach_local_socket_call_cb(ls);
// send the terminating socket
struct local_socket zero = {
.net_ns_inode = ps->net_ns_inode,
};
local_sockets_send_to_parent(ls, &zero, &cw);
#ifdef HAVE_LIBMNL
local_sockets_netlink_cleanup(ls);
#endif
close(pipefd[1]); // Close write end of pipe
exit(EXIT_SUCCESS);
if(si == NULL) {
local_sockets_log(ls, "cannot create spawn instance");
return false;
}
// parent
close(fd);
close(pipefd[1]);
size_t received = 0;
struct local_socket buf;
while(read(pipefd[0], &buf, sizeof(buf)) == sizeof(buf)) {
while(read(spawn_server_instance_read_fd(si), &buf, sizeof(buf)) == sizeof(buf)) {
size_t len = 0;
if(read(pipefd[0], &len, sizeof(len)) != sizeof(len))
if(read(spawn_server_instance_read_fd(si), &len, sizeof(len)) != sizeof(len))
local_sockets_log(ls, "failed to read cmdline length from pipe");
if(len) {
char cmdline[len + 1];
if(read(pipefd[0], cmdline, len) != (ssize_t)len)
if(read(spawn_server_instance_read_fd(si), cmdline, len) != (ssize_t)len)
local_sockets_log(ls, "failed to read cmdline from pipe");
else {
cmdline[len] = '\0';
@ -1153,15 +1259,15 @@ static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_
break;
}
spinlock_lock(&ls->spinlock);
SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_get_slot_LOCAL_SOCKET(&ls->sockets_hashtable, buf.inode, &buf, true);
LOCAL_SOCKET *n = SIMPLE_HASHTABLE_SLOT_DATA(sl);
if(n) {
string_freez(buf.cmdline);
// local_sockets_log(ls,
// "ns inode %" PRIu64" (comm: '%s', pid: %u, ns: %"PRIu64") already exists in hashtable (comm: '%s', pid: %u, ns: %"PRIu64") - ignoring duplicate",
// buf.inode, buf.comm, buf.pid, buf.net_ns_inode, n->comm, n->pid, n->net_ns_inode);
continue;
}
else {
n = aral_mallocz(ls->local_socket_aral);
@ -1170,75 +1276,109 @@ static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_
local_sockets_index_listening_port(ls, n);
}
spinlock_unlock(&ls->spinlock);
}
close(pipefd[0]);
spawn_server_exec_kill(ls->spawn_server, si);
return received > 0;
}
static inline void local_socket_waitpid(LS_STATE *ls, pid_t pid) {
if(!pid) return;
struct local_sockets_namespace_worker {
LS_STATE *ls;
uint64_t inode;
};
int status;
waitpid(pid, &status, 0);
static inline void *local_sockets_get_namespace_sockets(void *arg) {
struct local_sockets_namespace_worker *data = arg;
LS_STATE *ls = data->ls;
const uint64_t inode = data->inode;
if (WIFEXITED(status) && WEXITSTATUS(status) != 0)
local_sockets_log(ls, "Child exited with status %d", WEXITSTATUS(status));
else if (WIFSIGNALED(status))
local_sockets_log(ls, "Child terminated by signal %d", WTERMSIG(status));
spinlock_lock(&ls->spinlock);
// find a pid_socket that has this namespace
for(SIMPLE_HASHTABLE_SLOT_PID_SOCKET *sl_pid = simple_hashtable_first_read_only_PID_SOCKET(&ls->pid_sockets_hashtable) ;
sl_pid ;
sl_pid = simple_hashtable_next_read_only_PID_SOCKET(&ls->pid_sockets_hashtable, sl_pid)) {
struct pid_socket *ps = SIMPLE_HASHTABLE_SLOT_DATA(sl_pid);
if(!ps || ps->net_ns_inode != inode) continue;
// now we have a pid that has the same namespace inode
spinlock_unlock(&ls->spinlock);
const bool worked = local_sockets_get_namespace_sockets_with_pid(ls, ps);
spinlock_lock(&ls->spinlock);
if(worked)
break;
}
spinlock_unlock(&ls->spinlock);
return NULL;
}
static inline void local_sockets_namespaces(LS_STATE *ls) {
pid_t children[5] = { 0 };
size_t last_child = 0;
size_t threads = ls->config.max_concurrent_namespaces;
if(threads == 0) threads = 5;
if(threads > 100) threads = 100;
size_t last_thread = 0;
ND_THREAD *workers[threads];
struct local_sockets_namespace_worker workers_data[threads];
memset(workers, 0, sizeof(workers));
memset(workers_data, 0, sizeof(workers_data));
spinlock_lock(&ls->spinlock);
for(SIMPLE_HASHTABLE_SLOT_NET_NS *sl = simple_hashtable_first_read_only_NET_NS(&ls->ns_hashtable);
sl;
sl = simple_hashtable_next_read_only_NET_NS(&ls->ns_hashtable, sl)) {
uint64_t inode = (uint64_t)SIMPLE_HASHTABLE_SLOT_DATA(sl);
const uint64_t inode = (uint64_t)SIMPLE_HASHTABLE_SLOT_DATA(sl);
if(inode == ls->proc_self_net_ns_inode)
continue;
// find a pid_socket that has this namespace
for(SIMPLE_HASHTABLE_SLOT_PID_SOCKET *sl_pid = simple_hashtable_first_read_only_PID_SOCKET(&ls->pid_sockets_hashtable) ;
sl_pid ;
sl_pid = simple_hashtable_next_read_only_PID_SOCKET(&ls->pid_sockets_hashtable, sl_pid)) {
struct pid_socket *ps = SIMPLE_HASHTABLE_SLOT_DATA(sl_pid);
if(!ps || ps->net_ns_inode != inode) continue;
spinlock_unlock(&ls->spinlock);
if(++last_child >= 5)
last_child = 0;
ls->stats.namespaces_found++;
local_socket_waitpid(ls, children[last_child]);
children[last_child] = 0;
if(workers[last_thread] != NULL) {
if(++last_thread >= threads)
last_thread = 0;
// now we have a pid that has the same namespace inode
if(local_sockets_get_namespace_sockets(ls, ps, &children[last_child]))
break;
if(workers[last_thread]) {
nd_thread_join(workers[last_thread]);
workers[last_thread] = NULL;
}
}
workers_data[last_thread].ls = ls;
workers_data[last_thread].inode = inode;
workers[last_thread] = nd_thread_create(
"local-sockets-worker", NETDATA_THREAD_OPTION_JOINABLE,
local_sockets_get_namespace_sockets, &workers_data[last_thread]);
spinlock_lock(&ls->spinlock);
}
for(size_t i = 0; i < 5 ;i++)
local_socket_waitpid(ls, children[i]);
spinlock_unlock(&ls->spinlock);
// wait all the threads running
for(size_t i = 0; i < threads ;i++) {
if(workers[i])
nd_thread_join(workers[i]);
}
}
// --------------------------------------------------------------------------------------------------------------------
static inline void local_sockets_process(LS_STATE *ls) {
#ifdef HAVE_LIBMNL
local_sockets_netlink_init(ls);
#endif
ls->config.host_prefix = netdata_configured_host_prefix;
// initialize our hashtables
local_sockets_init(ls);
// read all sockets from /proc
local_sockets_read_sockets_from_proc(ls);
local_sockets_read_all_system_sockets(ls);
// check all socket namespaces
if(ls->config.namespaces)
@ -1253,10 +1393,6 @@ static inline void local_sockets_process(LS_STATE *ls) {
// free all memory
local_sockets_cleanup(ls);
#ifdef HAVE_LIBMNL
local_sockets_netlink_cleanup(ls);
#endif
}
static inline void ipv6_address_to_txt(struct in6_addr *in6_addr, char *dst) {

View file

@ -0,0 +1,87 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "../libnetdata.h"
static int fd_is_valid(int fd) {
errno_clear();
return fcntl(fd, F_GETFD) != -1 || errno != EBADF;
}
int os_get_fd_open_max(void) {
static int fd_open_max = CLOSE_RANGE_FD_MAX;
if(fd_open_max != CLOSE_RANGE_FD_MAX)
return fd_open_max;
if(fd_open_max == CLOSE_RANGE_FD_MAX || fd_open_max == -1) {
struct rlimit rl;
if (getrlimit(RLIMIT_NOFILE, &rl) == 0 && rl.rlim_max != RLIM_INFINITY)
fd_open_max = rl.rlim_max;
}
#ifdef _SC_OPEN_MAX
if(fd_open_max == CLOSE_RANGE_FD_MAX || fd_open_max == -1) {
fd_open_max = sysconf(_SC_OPEN_MAX);
}
#endif
if(fd_open_max == CLOSE_RANGE_FD_MAX || fd_open_max == -1) {
// Arbitrary default if everything else fails
fd_open_max = 65535;
}
return fd_open_max;
}
void os_close_range(int first, int last) {
#if defined(HAVE_CLOSE_RANGE)
if(close_range(first, last, 0) == 0) return;
#endif
#if defined(OS_LINUX)
DIR *dir = opendir("/proc/self/fd");
if (dir != NULL) {
struct dirent *entry;
while ((entry = readdir(dir)) != NULL) {
int fd = str2i(entry->d_name);
if (fd >= first && (last == CLOSE_RANGE_FD_MAX || fd <= last) && fd_is_valid(fd))
(void)close(fd);
}
closedir(dir);
return;
}
#endif
// Fallback to looping through all file descriptors if necessary
if (last == CLOSE_RANGE_FD_MAX)
last = os_get_fd_open_max();
for (int fd = first; fd <= last; fd++) {
if (fd_is_valid(fd)) (void)close(fd);
}
}
static int compare_ints(const void *a, const void *b) {
int int_a = *((int*)a);
int int_b = *((int*)b);
return (int_a > int_b) - (int_a < int_b);
}
void os_close_all_non_std_open_fds_except(int fds[], size_t fds_num) {
if (fds_num == 0 || fds == NULL) {
os_close_range(STDERR_FILENO + 1, CLOSE_RANGE_FD_MAX);
return;
}
qsort(fds, fds_num, sizeof(int), compare_ints);
int start = STDERR_FILENO + 1;
for (size_t i = 0; i < fds_num; i++) {
if (fds[i] > start)
os_close_range(start, fds[i] - 1);
start = fds[i] + 1;
}
os_close_range(start, CLOSE_RANGE_FD_MAX);
}

View file

@ -0,0 +1,12 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef CLOSE_RANGE_H
#define CLOSE_RANGE_H
#define CLOSE_RANGE_FD_MAX (int)(~0U)
int os_get_fd_open_max(void);
void os_close_range(int first, int last);
void os_close_all_non_std_open_fds_except(int fds[], size_t fds_num);
#endif //CLOSE_RANGE_H

View file

@ -2,13 +2,27 @@
#include "../libnetdata.h"
pid_t pid_max = 32768;
pid_t os_get_system_pid_max(void) {
#if defined(OS_MACOS)
pid_t pid_max = 4194304;
pid_t os_get_system_pid_max(void) {
static bool read = false;
if(read) return pid_max;
read = true;
#if defined(OS_MACOS)
int mib[2];
int maxproc;
size_t len = sizeof(maxproc);
mib[0] = CTL_KERN;
mib[1] = KERN_MAXPROC;
if (sysctl(mib, 2, &maxproc, &len, NULL, 0) == -1) {
pid_max = 99999; // Fallback value
nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot find system max pid. Assuming %d.", pid_max);
}
else pid_max = (pid_t)maxproc;
// As we currently do not know a solution to query pid_max from the os
// we use the number defined in bsd/sys/proc_internal.h in XNU sources
pid_max = 99999;
return pid_max;
#elif defined(OS_FREEBSD)
@ -17,41 +31,40 @@ pid_t os_get_system_pid_max(void) {
if (unlikely(GETSYSCTL_BY_NAME("kern.pid_max", tmp_pid_max))) {
pid_max = 99999;
netdata_log_error("Assuming system's maximum pid is %d.", pid_max);
} else {
pid_max = tmp_pid_max;
nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot get system max pid. Assuming %d.", pid_max);
}
else
pid_max = tmp_pid_max;
return pid_max;
#elif defined(OS_LINUX)
static char read = 0;
if(unlikely(read)) return pid_max;
read = 1;
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", netdata_configured_host_prefix?netdata_configured_host_prefix:"");
unsigned long long max = 0;
if(read_single_number_file(filename, &max) != 0) {
netdata_log_error("Cannot open file '%s'. Assuming system supports %d pids.", filename, pid_max);
nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot open file '%s'. Assuming system supports %d pids.", filename, pid_max);
return pid_max;
}
if(!max) {
netdata_log_error("Cannot parse file '%s'. Assuming system supports %d pids.", filename, pid_max);
nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot parse file '%s'. Assuming system supports %d pids.", filename, pid_max);
return pid_max;
}
pid_max = (pid_t) max;
return pid_max;
#elif defined(OS_WINDOWS)
pid_max = (pid_t)0x7FFFFFFF;
return pid_max;
#else
// just a big default
pid_max = 4194304;
// return the default
return pid_max;
#endif

View file

@ -7,12 +7,13 @@
#include <sys/syscall.h>
#endif
#include "setproctitle.h"
#include "close_range.h"
#include "setresuid.h"
#include "setresgid.h"
#include "getgrouplist.h"
#include "adjtimex.h"
#include "gettid.h"
#include "waitid.h"
#include "get_pid_max.h"
#include "get_system_cpus.h"
#include "tinysleep.h"

View file

@ -0,0 +1,31 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "../libnetdata.h"
#include "setproctitle.h"
void os_setproctitle(const char *new_name, const int argc, const char **argv) {
#ifdef HAVE_SYS_PRCTL_H
// Set the process name (comm)
prctl(PR_SET_NAME, new_name, 0, 0, 0);
#endif
#ifdef __FreeBSD__
// Set the process name on FreeBSD
setproctitle("%s", new_name);
#endif
if(argc && argv) {
// replace with spaces all parameters found (except argv[0])
for(int i = 1; i < argc ;i++) {
char *s = (char *)&argv[i][0];
while(*s != '\0') *s++ = ' ';
}
// overwrite argv[0]
size_t len = strlen(new_name);
const size_t argv0_len = strlen(argv[0]);
strncpyz((char *)argv[0], new_name, MIN(len, argv0_len));
while(len < argv0_len)
((char *)argv[0])[len++] = ' ';
}
}

View file

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef SETPROCTITLE_H
#define SETPROCTITLE_H
void os_setproctitle(const char *new_name, int argc, const char **argv);
#endif //SETPROCTITLE_H

View file

@ -1,72 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "../libnetdata.h"
int os_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options) {
#if defined(HAVE_WAITID)
return waitid(idtype, id, infop, options);
#else
// emulate waitid() using waitpid()
// a cache for WNOWAIT
static const struct pid_status empty = { 0, 0 };
static __thread struct pid_status last = { 0, 0 }; // the cache
struct pid_status current = { 0, 0 };
// zero the infop structure
memset(infop, 0, sizeof(*infop));
// from the infop structure we use only 3 fields:
// - si_pid
// - si_code
// - si_status
// so, we update only these 3
switch(idtype) {
case P_ALL:
current.pid = waitpid((pid_t)-1, &current.status, options);
if(options & WNOWAIT)
last = current;
else
last = empty;
break;
case P_PID:
if(last.pid == (pid_t)id) {
current = last;
last = empty;
}
else
current.pid = waitpid((pid_t)id, &current.status, options);
break;
default:
errno = ENOSYS;
return -1;
}
if (current.pid > 0) {
if (WIFEXITED(current.status)) {
infop->si_code = CLD_EXITED;
infop->si_status = WEXITSTATUS(current.status);
} else if (WIFSIGNALED(current.status)) {
infop->si_code = WTERMSIG(current.status) == SIGABRT ? CLD_DUMPED : CLD_KILLED;
infop->si_status = WTERMSIG(current.status);
} else if (WIFSTOPPED(current.status)) {
infop->si_code = CLD_STOPPED;
infop->si_status = WSTOPSIG(current.status);
} else if (WIFCONTINUED(current.status)) {
infop->si_code = CLD_CONTINUED;
infop->si_status = SIGCONT;
}
infop->si_pid = current.pid;
return 0;
} else if (current.pid == 0) {
// No change in state, depends on WNOHANG
return 0;
}
return -1;
#endif
}

View file

@ -1,48 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef NETDATA_WAITID_H
#define NETDATA_WAITID_H
#include "config.h"
#include <sys/types.h>
#include <signal.h>
#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif
#ifndef WNOWAIT
#define WNOWAIT 0x01000000
#endif
#ifndef WEXITED
#define WEXITED 4
#endif
#if !defined(HAVE_WAITID)
typedef enum
{
P_ALL, /* Wait for any child. */
P_PID, /* Wait for specified process. */
P_PGID, /* Wait for members of process group. */
P_PIDFD, /* Wait for the child referred by the PID file descriptor. */
} idtype_t;
struct pid_status {
pid_t pid;
int status;
};
#if defined(OS_WINDOWS) && !defined(__CYGWIN__)
typedef uint32_t id_t;
typedef struct {
int si_code; /* Signal code. */
int si_status; /* Exit value or signal. */
pid_t si_pid; /* Sending process ID. */
} siginfo_t;
#endif
#endif
int os_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
#endif //NETDATA_WAITID_H

View file

@ -1,15 +0,0 @@
<!--
title: "popen"
custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/popen/README.md
sidebar_label: "popen"
learn_status: "Published"
learn_topic_type: "Tasks"
learn_rel_path: "Developers/libnetdata"
-->
# popen
Process management library

View file

@ -1,446 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "../libnetdata.h"
// ----------------------------------------------------------------------------
// popen with tracking
static pthread_mutex_t netdata_popen_tracking_mutex = NETDATA_MUTEX_INITIALIZER;
struct netdata_popen {
pid_t pid;
bool reaped;
siginfo_t infop;
int waitid_ret;
struct netdata_popen *next;
struct netdata_popen *prev;
};
static struct netdata_popen *netdata_popen_root = NULL;
// myp_add_lock takes the lock if we're tracking.
static void netdata_popen_tracking_lock(void) {
netdata_mutex_lock(&netdata_popen_tracking_mutex);
}
// myp_add_unlock release the lock if we're tracking.
static void netdata_popen_tracking_unlock(void) {
netdata_mutex_unlock(&netdata_popen_tracking_mutex);
}
// myp_add_locked adds pid if we're tracking.
// myp_add_lock must have been called previously.
static void netdata_popen_tracking_add_pid_unsafe(pid_t pid) {
struct netdata_popen *mp;
mp = callocz(1, sizeof(struct netdata_popen));
mp->pid = pid;
DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(netdata_popen_root, mp, prev, next);
}
// myp_del deletes pid if we're tracking.
static void netdata_popen_tracking_del_pid(pid_t pid) {
struct netdata_popen *mp;
netdata_popen_tracking_lock();
DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) {
if(unlikely(mp->pid == pid))
break;
}
if(mp) {
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(netdata_popen_root, mp, prev, next);
freez(mp);
}
else
netdata_log_error("POPEN: Cannot find pid %d.", pid);
netdata_popen_tracking_unlock();
}
// myp_free cleans up any resources allocated for process
// tracking.
void netdata_popen_tracking_cleanup(void) {
netdata_popen_tracking_lock();
while(netdata_popen_root) {
struct netdata_popen *mp = netdata_popen_root;
DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(netdata_popen_root, mp, prev, next);
freez(mp);
}
netdata_popen_tracking_unlock();
}
int netdata_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options) {
struct netdata_popen *mp = NULL;
if(idtype == P_PID && id != 0) {
// the caller is asking to waitid() for a specific child pid
netdata_popen_tracking_lock();
DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) {
if(unlikely(mp->pid == (pid_t)id))
break;
}
if(!mp)
netdata_popen_tracking_unlock();
}
int ret;
if(mp && mp->reaped) {
// we have already reaped this child
ret = mp->waitid_ret;
*infop = mp->infop;
}
else {
// we haven't reaped this child yet
ret = os_waitid(idtype, id, infop, options);
if(mp && !mp->reaped) {
mp->reaped = true;
mp->infop = *infop;
mp->waitid_ret = ret;
}
}
if(mp)
netdata_popen_tracking_unlock();
return ret;
}
// ----------------------------------------------------------------------------
// helpers
static inline void convert_argv_to_string(char *dst, size_t size, const char *spawn_argv[]) {
int i;
for(i = 0; spawn_argv[i] ;i++) {
if(i == 0) snprintfz(dst, size, "%s", spawn_argv[i]);
else {
size_t len = strlen(dst);
snprintfz(&dst[len], size - len, " '%s'", spawn_argv[i]);
}
}
}
// ----------------------------------------------------------------------------
// the core of netdata popen
/*
* Returns -1 on failure, 0 on success. When POPEN_FLAG_CREATE_PIPE is set, on success set the FILE *fp pointer.
*/
#define PIPE_READ 0
#define PIPE_WRITE 1
static int popene_internal(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_stdin, FILE **fpp_child_stdout, const char *command, const char *spawn_argv[]) {
// create a string to be logged about the command we are running
char command_to_be_logged[2048];
convert_argv_to_string(command_to_be_logged, sizeof(command_to_be_logged), spawn_argv);
// netdata_log_info("custom_popene() running command: %s", command_to_be_logged);
int ret = 0; // success by default
int attr_rc = 1; // failure by default
FILE *fp_child_stdin = NULL, *fp_child_stdout = NULL;
int pipefd_stdin[2] = { -1, -1 };
int pipefd_stdout[2] = { -1, -1 };
pid_t pid;
posix_spawnattr_t attr;
posix_spawn_file_actions_t fa;
unsigned int fds_to_exclude_from_closing = OPEN_FD_EXCLUDE_STDERR;
if(posix_spawn_file_actions_init(&fa)) {
netdata_log_error("POPEN: posix_spawn_file_actions_init() failed.");
ret = -1;
goto set_return_values_and_return;
}
if(fpp_child_stdin) {
if (pipe(pipefd_stdin) == -1) {
netdata_log_error("POPEN: stdin pipe() failed");
ret = -1;
goto cleanup_and_return;
}
if ((fp_child_stdin = fdopen(pipefd_stdin[PIPE_WRITE], "w")) == NULL) {
netdata_log_error("POPEN: fdopen() stdin failed");
ret = -1;
goto cleanup_and_return;
}
if(posix_spawn_file_actions_adddup2(&fa, pipefd_stdin[PIPE_READ], STDIN_FILENO)) {
netdata_log_error("POPEN: posix_spawn_file_actions_adddup2() on stdin failed.");
ret = -1;
goto cleanup_and_return;
}
}
else {
if (posix_spawn_file_actions_addopen(&fa, STDIN_FILENO, "/dev/null", O_RDONLY, 0)) {
netdata_log_error("POPEN: posix_spawn_file_actions_addopen() on stdin to /dev/null failed.");
// this is not a fatal error
fds_to_exclude_from_closing |= OPEN_FD_EXCLUDE_STDIN;
}
}
if (fpp_child_stdout) {
if (pipe(pipefd_stdout) == -1) {
netdata_log_error("POPEN: stdout pipe() failed");
ret = -1;
goto cleanup_and_return;
}
if ((fp_child_stdout = fdopen(pipefd_stdout[PIPE_READ], "r")) == NULL) {
netdata_log_error("POPEN: fdopen() stdout failed");
ret = -1;
goto cleanup_and_return;
}
if(posix_spawn_file_actions_adddup2(&fa, pipefd_stdout[PIPE_WRITE], STDOUT_FILENO)) {
netdata_log_error("POPEN: posix_spawn_file_actions_adddup2() on stdout failed.");
ret = -1;
goto cleanup_and_return;
}
}
else {
if (posix_spawn_file_actions_addopen(&fa, STDOUT_FILENO, "/dev/null", O_WRONLY, 0)) {
netdata_log_error("POPEN: posix_spawn_file_actions_addopen() on stdout to /dev/null failed.");
// this is not a fatal error
fds_to_exclude_from_closing |= OPEN_FD_EXCLUDE_STDOUT;
}
}
if(flags & POPEN_FLAG_CLOSE_FD) {
// Mark all files to be closed by the exec() stage of posix_spawn()
for_each_open_fd(OPEN_FD_ACTION_FD_CLOEXEC, fds_to_exclude_from_closing);
}
attr_rc = posix_spawnattr_init(&attr);
if(attr_rc) {
// failed
netdata_log_error("POPEN: posix_spawnattr_init() failed.");
}
else {
// success
// reset all signals in the child
if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF))
netdata_log_error("POPEN: posix_spawnattr_setflags() failed.");
sigset_t mask;
sigemptyset(&mask);
if (posix_spawnattr_setsigmask(&attr, &mask))
netdata_log_error("POPEN: posix_spawnattr_setsigmask() failed.");
}
// Take the lock while we fork to ensure we don't race with SIGCHLD
// delivery on a process which exits quickly.
netdata_popen_tracking_lock();
if (!posix_spawn(&pid, command, &fa, &attr, (char * const*)spawn_argv, env)) {
// success
*pidptr = pid;
netdata_popen_tracking_add_pid_unsafe(pid);
netdata_popen_tracking_unlock();
}
else {
// failure
netdata_popen_tracking_unlock();
netdata_log_error("POPEN: failed to spawn command: \"%s\" from parent pid %d.", command_to_be_logged, getpid());
ret = -1;
goto cleanup_and_return;
}
// the normal cleanup will run
// but ret == 0 at this point
cleanup_and_return:
if(!attr_rc) {
// posix_spawnattr_init() succeeded
if (posix_spawnattr_destroy(&attr))
netdata_log_error("POPEN: posix_spawnattr_destroy() failed");
}
if (posix_spawn_file_actions_destroy(&fa))
netdata_log_error("POPEN: posix_spawn_file_actions_destroy() failed");
// the child end - close it
if(pipefd_stdin[PIPE_READ] != -1)
close(pipefd_stdin[PIPE_READ]);
// our end
if(ret == -1 || !fpp_child_stdin) {
if (fp_child_stdin)
fclose(fp_child_stdin);
else if (pipefd_stdin[PIPE_WRITE] != -1)
close(pipefd_stdin[PIPE_WRITE]);
fp_child_stdin = NULL;
}
// the child end - close it
if (pipefd_stdout[PIPE_WRITE] != -1)
close(pipefd_stdout[PIPE_WRITE]);
// our end
if (ret == -1 || !fpp_child_stdout) {
if (fp_child_stdout)
fclose(fp_child_stdout);
else if (pipefd_stdout[PIPE_READ] != -1)
close(pipefd_stdout[PIPE_READ]);
fp_child_stdout = NULL;
}
set_return_values_and_return:
if(fpp_child_stdin)
*fpp_child_stdin = fp_child_stdin;
if(fpp_child_stdout)
*fpp_child_stdout = fp_child_stdout;
return ret;
}
int netdata_popene_variadic_internal_dont_use_directly(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_input, FILE **fpp_child_output, const char *command, ...) {
// convert the variable list arguments into what posix_spawn() needs
// all arguments are expected strings
va_list args;
int args_count;
// count the number variable parameters
// the variable parameters are expected NULL terminated
{
const char *s;
va_start(args, command);
args_count = 0;
while ((s = va_arg(args, const char *))) args_count++;
va_end(args);
}
// create a string pointer array as needed by posix_spawn()
// variable array in the stack
const char *spawn_argv[args_count + 1];
{
const char *s;
va_start(args, command);
int i;
for (i = 0; i < args_count; i++) {
s = va_arg(args, const char *);
spawn_argv[i] = s;
}
spawn_argv[args_count] = NULL;
va_end(args);
}
return popene_internal(pidptr, env, flags, fpp_child_input, fpp_child_output, command, spawn_argv);
}
// See man environ
extern char **environ;
FILE *netdata_popen(const char *command, volatile pid_t *pidptr, FILE **fpp_child_input) {
FILE *fp_child_output = NULL;
const char *spawn_argv[] = {
"sh",
"-c",
command,
NULL
};
(void)popene_internal(pidptr, environ, POPEN_FLAG_CLOSE_FD, fpp_child_input, &fp_child_output, "/bin/sh", spawn_argv);
return fp_child_output;
}
FILE *netdata_popene(const char *command, volatile pid_t *pidptr, char **env, FILE **fpp_child_input) {
FILE *fp_child_output = NULL;
const char *spawn_argv[] = {
"sh",
"-c",
command,
NULL
};
(void)popene_internal(pidptr, env, POPEN_FLAG_CLOSE_FD, fpp_child_input, &fp_child_output, "/bin/sh", spawn_argv);
return fp_child_output;
}
// returns 0 on success, -1 on failure
int netdata_spawn(const char *command, volatile pid_t *pidptr) {
const char *spawn_argv[] = {
"sh",
"-c",
command,
NULL
};
return popene_internal(pidptr, environ, POPEN_FLAG_NONE, NULL, NULL, "/bin/sh", spawn_argv);
}
int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid) {
int ret;
siginfo_t info;
netdata_log_debug(D_EXIT, "Request to netdata_pclose() on pid %d", pid);
if (fp_child_input)
fclose(fp_child_input);
if (fp_child_output)
fclose(fp_child_output);
errno = 0;
ret = netdata_waitid(P_PID, (id_t) pid, &info, WEXITED);
netdata_popen_tracking_del_pid(pid);
if (ret != -1) {
switch (info.si_code) {
case CLD_EXITED:
if(info.si_status)
netdata_log_error("child pid %d exited with code %d.", info.si_pid, info.si_status);
return(info.si_status);
case CLD_KILLED:
if(info.si_status == SIGTERM) {
netdata_log_info("child pid %d killed by SIGTERM", info.si_pid);
return(0);
}
else if(info.si_status == SIGPIPE) {
netdata_log_info("child pid %d killed by SIGPIPE.", info.si_pid);
return(0);
}
else {
netdata_log_error("child pid %d killed by signal %d.", info.si_pid, info.si_status);
return(-1);
}
case CLD_DUMPED:
netdata_log_error("child pid %d core dumped by signal %d.", info.si_pid, info.si_status);
return(-2);
case CLD_STOPPED:
netdata_log_error("child pid %d stopped by signal %d.", info.si_pid, info.si_status);
return(0);
case CLD_TRAPPED:
netdata_log_error("child pid %d trapped by signal %d.", info.si_pid, info.si_status);
return(-4);
case CLD_CONTINUED:
netdata_log_error("child pid %d continued by signal %d.", info.si_pid, info.si_status);
return(0);
default:
netdata_log_error("child pid %d gave us a SIGCHLD with code %d and status %d.", info.si_pid, info.si_code, info.si_status);
return(-5);
}
}
else
netdata_log_error("Cannot waitid() for pid %d", pid);
return 0;
}

View file

@ -1,35 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef NETDATA_POPEN_H
#define NETDATA_POPEN_H 1
#include "../os/waitid.h"
int netdata_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
#include "../libnetdata.h"
#define PIPE_READ 0
#define PIPE_WRITE 1
/* custom_popene_variadic_internal_dont_use_directly flag definitions */
#define POPEN_FLAG_NONE 0
#define POPEN_FLAG_CLOSE_FD (1 << 0) // Close all file descriptors other than STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO
// the flags to be used by default
#define POPEN_FLAGS_DEFAULT (POPEN_FLAG_CLOSE_FD)
// mypopen_raw is the interface to use instead of custom_popene_variadic_internal_dont_use_directly()
// mypopen_raw will add the terminating NULL at the arguments list
// we append the parameter 'command' twice - this is because the underlying call needs the command to execute and the argv[0] to pass to it
#define netdata_popen_raw_default_flags_and_environment(pidptr, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, environ, POPEN_FLAGS_DEFAULT, fpp_child_input, fpp_child_output, command, command, ##args, NULL)
#define netdata_popen_raw_default_flags(pidptr, env, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, env, POPEN_FLAGS_DEFAULT, fpp_child_input, fpp_child_output, command, command, ##args, NULL)
#define netdata_popen_raw(pidptr, env, flags, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, env, flags, fpp_child_input, fpp_child_output, command, command, ##args, NULL)
FILE *netdata_popen(const char *command, volatile pid_t *pidptr, FILE **fp_child_input);
FILE *netdata_popene(const char *command, volatile pid_t *pidptr, char **env, FILE **fp_child_input);
int netdata_popene_variadic_internal_dont_use_directly(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_input, FILE **fpp_child_output, const char *command, ...);
int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid);
int netdata_spawn(const char *command, volatile pid_t *pidptr);
#endif /* NETDATA_POPEN_H */

View file

@ -336,7 +336,7 @@ __attribute__((constructor)) void procfile_initialize_default_separators(void) {
if(unlikely(i == '\n' || i == '\r'))
procfile_default_separators[i] = PF_CHAR_IS_NEWLINE;
else if(unlikely(isspace(i) || !isprint(i)))
else if(unlikely(isspace(i) || (!isprint(i) && !IS_UTF8_BYTE(i))))
procfile_default_separators[i] = PF_CHAR_IS_SEPARATOR;
else

View file

@ -194,11 +194,9 @@ int sock_setreuse(int fd, int reuse) {
void sock_setcloexec(int fd)
{
UNUSED(fd);
#ifndef SOCK_CLOEXEC
int flags = fcntl(fd, F_GETFD);
if (flags != -1)
(void) fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
#endif
}
int sock_setreuse_port(int fd __maybe_unused, int reuse __maybe_unused) {
@ -290,7 +288,7 @@ int create_listen_socket_unix(const char *path, int listen_backlog) {
name.sun_family = AF_UNIX;
strncpy(name.sun_path, path, sizeof(name.sun_path)-1);
errno = 0;
errno_clear();
if (unlink(path) == -1 && errno != ENOENT)
nd_log(NDLS_DAEMON, NDLP_ERR,
"LISTENER: failed to remove existing (probably obsolete or left-over) file on UNIX socket path '%s'.",
@ -918,7 +916,7 @@ int connect_to_this_ip46(int protocol, int socktype, const char *host, uint32_t
}
sock_setcloexec(fd);
errno = 0;
errno_clear();
if(connect(fd, ai->ai_addr, ai->ai_addrlen) < 0) {
if(errno == EALREADY || errno == EINPROGRESS) {
nd_log(NDLS_DAEMON, NDLP_DEBUG,
@ -1200,7 +1198,7 @@ inline int wait_on_socket_or_cancel_with_timeout(
const int wait_ms = (timeout_ms >= ND_CHECK_CANCELLABILITY_WHILE_WAITING_EVERY_MS || forever) ?
ND_CHECK_CANCELLABILITY_WHILE_WAITING_EVERY_MS : timeout_ms;
errno = 0;
errno_clear();
// check every wait_ms
const int ret = poll(&pfd, 1, wait_ms);
@ -1482,7 +1480,7 @@ int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *clien
break;
}
if (!connection_allowed(nfd, client_ip, client_host, hostsize, access_list, "connection", allow_dns)) {
errno = 0;
errno_clear();
nd_log(NDLS_DAEMON, NDLP_WARNING,
"Permission denied for client '%s', port '%s'",
client_ip, client_port);

View file

@ -0,0 +1,138 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "spawn_popen.h"
SPAWN_SERVER *netdata_main_spawn_server = NULL;
bool netdata_main_spawn_server_init(const char *name, int argc, const char **argv) {
if(netdata_main_spawn_server == NULL) {
static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
spinlock_lock(&spinlock);
if(netdata_main_spawn_server == NULL)
netdata_main_spawn_server = spawn_server_create(name, NULL, argc, argv);
spinlock_unlock(&spinlock);
}
return netdata_main_spawn_server != NULL;
}
void netdata_main_spawn_server_cleanup(void) {
if(netdata_main_spawn_server) {
spawn_server_destroy(netdata_main_spawn_server);
netdata_main_spawn_server = NULL;
}
}
POPEN_INSTANCE *spawn_popen_run_argv(const char **argv) {
netdata_main_spawn_server_init(NULL, 0, NULL);
SPAWN_INSTANCE *si = spawn_server_exec(netdata_main_spawn_server, nd_log_collectors_fd(),
0, argv, NULL, 0, SPAWN_INSTANCE_TYPE_EXEC);
if(si == NULL) return NULL;
POPEN_INSTANCE *pi = mallocz(sizeof(*pi));
pi->si = si;
pi->child_stdin_fp = fdopen(spawn_server_instance_write_fd(si), "w");
pi->child_stdout_fp = fdopen(spawn_server_instance_read_fd(si), "r");
if(!pi->child_stdin_fp) {
nd_log(NDLS_COLLECTORS, NDLP_ERR, "Cannot open FILE on child's stdin on fd %d.", spawn_server_instance_write_fd(si));
goto cleanup;
}
if(!pi->child_stdout_fp) {
nd_log(NDLS_COLLECTORS, NDLP_ERR, "Cannot open FILE on child's stdout on fd %d.", spawn_server_instance_read_fd(si));
goto cleanup;
}
return pi;
cleanup:
if(pi->child_stdin_fp) { fclose(pi->child_stdin_fp); spawn_server_instance_write_fd(si); }
if(pi->child_stdout_fp) { fclose(pi->child_stdout_fp); spawn_server_instance_read_fd_unset(si); }
spawn_server_exec_kill(netdata_main_spawn_server, si);
freez(pi);
return NULL;
}
POPEN_INSTANCE *spawn_popen_run_variadic(const char *cmd, ...) {
va_list args;
va_list args_copy;
int argc = 0;
// Start processing variadic arguments
va_start(args, cmd);
// Make a copy of args to count the number of arguments
va_copy(args_copy, args);
while (va_arg(args_copy, char *) != NULL) argc++;
va_end(args_copy);
// Allocate memory for argv array (+2 for cmd and NULL terminator)
const char *argv[argc + 2];
// Populate the argv array
argv[0] = cmd;
for (int i = 1; i <= argc; i++)
argv[i] = va_arg(args, const char *);
argv[argc + 1] = NULL; // NULL-terminate the array
// End processing variadic arguments
va_end(args);
return spawn_popen_run_argv(argv);
}
POPEN_INSTANCE *spawn_popen_run(const char *cmd) {
if(!cmd || !*cmd) return NULL;
const char *argv[] = {
"/bin/sh",
"-c",
cmd,
NULL
};
return spawn_popen_run_argv(argv);
}
static int spawn_popen_status_rc(int status) {
if(WIFEXITED(status))
return WEXITSTATUS(status);
if(WIFSIGNALED(status)) {
int sig = WTERMSIG(status);
switch(sig) {
case SIGTERM:
case SIGPIPE:
return 0;
default:
return -1;
}
}
return -1;
}
int spawn_popen_wait(POPEN_INSTANCE *pi) {
if(!pi) return -1;
fclose(pi->child_stdin_fp); pi->child_stdin_fp = NULL; spawn_server_instance_write_fd_unset(pi->si);
fclose(pi->child_stdout_fp); pi->child_stdout_fp = NULL; spawn_server_instance_read_fd_unset(pi->si);
int status = spawn_server_exec_wait(netdata_main_spawn_server, pi->si);
freez(pi);
return spawn_popen_status_rc(status);
}
int spawn_popen_kill(POPEN_INSTANCE *pi) {
if(!pi) return -1;
fclose(pi->child_stdin_fp); pi->child_stdin_fp = NULL; spawn_server_instance_write_fd_unset(pi->si);
fclose(pi->child_stdout_fp); pi->child_stdout_fp = NULL; spawn_server_instance_read_fd_unset(pi->si);
int status = spawn_server_exec_kill(netdata_main_spawn_server, pi->si);
freez(pi);
return spawn_popen_status_rc(status);
}

View file

@ -0,0 +1,24 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef SPAWN_POPEN_H
#define SPAWN_POPEN_H
#include "../libnetdata.h"
extern SPAWN_SERVER *netdata_main_spawn_server;
bool netdata_main_spawn_server_init(const char *name, int argc, const char **argv);
void netdata_main_spawn_server_cleanup(void);
typedef struct {
SPAWN_INSTANCE *si;
FILE *child_stdin_fp;
FILE *child_stdout_fp;
} POPEN_INSTANCE;
POPEN_INSTANCE *spawn_popen_run(const char *cmd);
POPEN_INSTANCE *spawn_popen_run_argv(const char **argv);
POPEN_INSTANCE *spawn_popen_run_variadic(const char *cmd, ...);
int spawn_popen_wait(POPEN_INSTANCE *pi);
int spawn_popen_kill(POPEN_INSTANCE *pi);
#endif //SPAWN_POPEN_H

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,48 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef SPAWN_SERVER_H
#define SPAWN_SERVER_H
#define SPAWN_SERVER_TRANSFER_FDS 4
typedef enum {
SPAWN_INSTANCE_TYPE_EXEC = 0,
#if !defined(OS_WINDOWS)
SPAWN_INSTANCE_TYPE_CALLBACK = 1
#endif
} SPAWN_INSTANCE_TYPE;
// this is only used publicly for SPAWN_INSTANCE_TYPE_CALLBACK
// which is not available in Windows
typedef struct spawn_request {
size_t request_id;
pid_t pid;
int socket;
int fds[SPAWN_SERVER_TRANSFER_FDS]; // 0 = stdin, 1 = stdout, 2 = stderr, 3 = custom
const char **environment;
const char **argv;
const void *data;
size_t data_size;
SPAWN_INSTANCE_TYPE type;
struct spawn_request *prev, *next;
} SPAWN_REQUEST;
typedef void (*spawn_request_callback_t)(SPAWN_REQUEST *request);
typedef struct spawm_instance SPAWN_INSTANCE;
typedef struct spawn_server SPAWN_SERVER;
SPAWN_SERVER* spawn_server_create(const char *name, spawn_request_callback_t child_callback, int argc, const char **argv);
void spawn_server_destroy(SPAWN_SERVER *server);
SPAWN_INSTANCE* spawn_server_exec(SPAWN_SERVER *server, int stderr_fd, int custom_fd, const char **argv, const void *data, size_t data_size, SPAWN_INSTANCE_TYPE type);
int spawn_server_exec_kill(SPAWN_SERVER *server, SPAWN_INSTANCE *instance);
int spawn_server_exec_wait(SPAWN_SERVER *server, SPAWN_INSTANCE *instance);
int spawn_server_instance_read_fd(SPAWN_INSTANCE *si);
int spawn_server_instance_write_fd(SPAWN_INSTANCE *si);
pid_t spawn_server_instance_pid(SPAWN_INSTANCE *si);
void spawn_server_instance_read_fd_unset(SPAWN_INSTANCE *si);
void spawn_server_instance_write_fd_unset(SPAWN_INSTANCE *si);
#endif //SPAWN_SERVER_H

View file

@ -95,7 +95,7 @@ static inline str2xx_errno str2int(int *out, char *s, int base) {
// m_assert(0, "str2int error: STR2XX_INCONVERTIBLE");
return STR2XX_INCONVERTIBLE;
}
errno = 0;
errno_clear();
long l = strtol(s, &end, base);
/* Both checks are needed because INT_MAX == LONG_MAX is possible. */
if (unlikely(l > INT_MAX || (errno == ERANGE && l == LONG_MAX))){
@ -124,7 +124,7 @@ static inline str2xx_errno str2float(float *out, char *s) {
// m_assert(0, "str2float error: STR2XX_INCONVERTIBLE");
return STR2XX_INCONVERTIBLE;
}
errno = 0;
errno_clear();
float f = strtof(s, &end);
/* Both checks are needed because INT_MAX == LONG_MAX is possible. */
if (unlikely((errno == ERANGE && f == HUGE_VALF))){

View file

@ -162,7 +162,7 @@ int registry_db_save(void) {
fclose(fp);
errno = 0;
errno_clear();
// remove the .old db
netdata_log_debug(D_REGISTRY, "REGISTRY: Removing old db '%s'", old_filename);

View file

View file

@ -1,288 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "spawn.h"
static uv_thread_t thread;
int spawn_thread_error;
int spawn_thread_shutdown;
struct spawn_queue spawn_cmd_queue;
static struct spawn_cmd_info *create_spawn_cmd(const char *command_to_run)
{
struct spawn_cmd_info *cmdinfo;
cmdinfo = mallocz(sizeof(*cmdinfo));
fatal_assert(0 == uv_cond_init(&cmdinfo->cond));
fatal_assert(0 == uv_mutex_init(&cmdinfo->mutex));
cmdinfo->serial = 0; /* invalid */
cmdinfo->command_to_run = strdupz(command_to_run);
cmdinfo->exit_status = -1; /* invalid */
cmdinfo->pid = -1; /* invalid */
cmdinfo->flags = 0;
return cmdinfo;
}
void destroy_spawn_cmd(struct spawn_cmd_info *cmdinfo)
{
uv_cond_destroy(&cmdinfo->cond);
uv_mutex_destroy(&cmdinfo->mutex);
freez(cmdinfo->command_to_run);
freez(cmdinfo);
}
int spawn_cmd_compare(void *a, void *b)
{
struct spawn_cmd_info *cmda = a, *cmdb = b;
/* No need for mutex, serial will never change and the entries cannot be deallocated yet */
if (cmda->serial < cmdb->serial) return -1;
if (cmda->serial > cmdb->serial) return 1;
return 0;
}
static void init_spawn_cmd_queue(void)
{
spawn_cmd_queue.cmd_tree.root = NULL;
spawn_cmd_queue.cmd_tree.compar = spawn_cmd_compare;
spawn_cmd_queue.size = 0;
spawn_cmd_queue.latest_serial = 0;
fatal_assert(0 == uv_cond_init(&spawn_cmd_queue.cond));
fatal_assert(0 == uv_mutex_init(&spawn_cmd_queue.mutex));
}
/*
* Returns serial number of the enqueued command
*/
uint64_t spawn_enq_cmd(const char *command_to_run)
{
unsigned queue_size;
uint64_t serial;
avl_t *avl_ret;
struct spawn_cmd_info *cmdinfo;
cmdinfo = create_spawn_cmd(command_to_run);
/* wait for free space in queue */
uv_mutex_lock(&spawn_cmd_queue.mutex);
while ((queue_size = spawn_cmd_queue.size) == SPAWN_MAX_OUTSTANDING) {
uv_cond_wait(&spawn_cmd_queue.cond, &spawn_cmd_queue.mutex);
}
fatal_assert(queue_size < SPAWN_MAX_OUTSTANDING);
spawn_cmd_queue.size = queue_size + 1;
serial = ++spawn_cmd_queue.latest_serial; /* 0 is invalid */
cmdinfo->serial = serial; /* No need to take the cmd mutex since it is unreachable at the moment */
/* enqueue command */
avl_ret = avl_insert(&spawn_cmd_queue.cmd_tree, (avl_t *)cmdinfo);
fatal_assert(avl_ret == (avl_t *)cmdinfo);
uv_mutex_unlock(&spawn_cmd_queue.mutex);
/* wake up event loop */
fatal_assert(0 == uv_async_send(&spawn_async));
return serial;
}
/*
* Blocks until command with serial finishes running. Only one thread is allowed to wait per command.
*/
void spawn_wait_cmd(uint64_t serial, int *exit_status, time_t *exec_run_timestamp)
{
avl_t *avl_ret;
struct spawn_cmd_info tmp, *cmdinfo;
tmp.serial = serial;
uv_mutex_lock(&spawn_cmd_queue.mutex);
avl_ret = avl_search(&spawn_cmd_queue.cmd_tree, (avl_t *)&tmp);
uv_mutex_unlock(&spawn_cmd_queue.mutex);
fatal_assert(avl_ret); /* Could be NULL if more than 1 threads wait for the command */
cmdinfo = (struct spawn_cmd_info *)avl_ret;
uv_mutex_lock(&cmdinfo->mutex);
while (!(cmdinfo->flags & SPAWN_CMD_DONE)) {
/* Only 1 thread is allowed to wait for this command to finish */
uv_cond_wait(&cmdinfo->cond, &cmdinfo->mutex);
}
uv_mutex_unlock(&cmdinfo->mutex);
spawn_deq_cmd(cmdinfo);
*exit_status = cmdinfo->exit_status;
*exec_run_timestamp = cmdinfo->exec_run_timestamp;
destroy_spawn_cmd(cmdinfo);
}
void spawn_deq_cmd(struct spawn_cmd_info *cmdinfo)
{
unsigned queue_size;
avl_t *avl_ret;
uv_mutex_lock(&spawn_cmd_queue.mutex);
queue_size = spawn_cmd_queue.size;
fatal_assert(queue_size);
/* dequeue command */
avl_ret = avl_remove(&spawn_cmd_queue.cmd_tree, (avl_t *)cmdinfo);
fatal_assert(avl_ret);
spawn_cmd_queue.size = queue_size - 1;
/* wake up callers */
uv_cond_signal(&spawn_cmd_queue.cond);
uv_mutex_unlock(&spawn_cmd_queue.mutex);
}
/*
* Must be called from the spawn client event loop context. This way no mutex is needed because the event loop is the
* only writer as far as struct spawn_cmd_info entries are concerned.
*/
static int find_unprocessed_spawn_cmd_cb(void *entry, void *data)
{
struct spawn_cmd_info **cmdinfop = data, *cmdinfo = entry;
if (!(cmdinfo->flags & SPAWN_CMD_PROCESSED)) {
*cmdinfop = cmdinfo;
return -1; /* break tree traversal */
}
return 0; /* continue traversing */
}
struct spawn_cmd_info *spawn_get_unprocessed_cmd(void)
{
struct spawn_cmd_info *cmdinfo;
unsigned queue_size;
int ret;
uv_mutex_lock(&spawn_cmd_queue.mutex);
queue_size = spawn_cmd_queue.size;
if (queue_size == 0) {
uv_mutex_unlock(&spawn_cmd_queue.mutex);
return NULL;
}
/* find command */
cmdinfo = NULL;
ret = avl_traverse(&spawn_cmd_queue.cmd_tree, find_unprocessed_spawn_cmd_cb, (void *)&cmdinfo);
if (-1 != ret) { /* no commands available for processing */
uv_mutex_unlock(&spawn_cmd_queue.mutex);
return NULL;
}
uv_mutex_unlock(&spawn_cmd_queue.mutex);
return cmdinfo;
}
/**
* This function spawns a process that shares a libuv IPC pipe with the caller and performs spawn server duties.
* The spawn server process will close all open file descriptors except for the pipe, UV_STDOUT_FD, and UV_STDERR_FD.
* The caller has to be the netdata user as configured.
*
* @param loop the libuv loop of the caller context
* @param spawn_channel the bidirectional libuv IPC pipe that the server and the caller will share
* @param process the spawn server libuv process context
* @return 0 on success or the libuv error code
*/
int create_spawn_server(uv_loop_t *loop, uv_pipe_t *spawn_channel, uv_process_t *process)
{
uv_process_options_t options = {0};
char *args[3];
int ret;
#define SPAWN_SERVER_DESCRIPTORS (3)
uv_stdio_container_t stdio[SPAWN_SERVER_DESCRIPTORS];
struct passwd *passwd = NULL;
char *user = NULL;
passwd = getpwuid(getuid());
user = (passwd && passwd->pw_name) ? passwd->pw_name : "";
args[0] = netdata_exe_file;
args[1] = SPAWN_SERVER_COMMAND_LINE_ARGUMENT;
args[2] = NULL;
memset(&options, 0, sizeof(options));
options.file = netdata_exe_file;
options.args = args;
options.exit_cb = NULL; //exit_cb;
options.stdio = stdio;
options.stdio_count = SPAWN_SERVER_DESCRIPTORS;
stdio[0].flags = UV_CREATE_PIPE | UV_READABLE_PIPE | UV_WRITABLE_PIPE;
stdio[0].data.stream = (uv_stream_t *)spawn_channel; /* bidirectional libuv pipe */
stdio[1].flags = UV_INHERIT_FD;
stdio[1].data.fd = 1 /* UV_STDOUT_FD */;
stdio[2].flags = UV_INHERIT_FD;
stdio[2].data.fd = nd_log_health_fd() /* UV_STDERR_FD */;
ret = uv_spawn(loop, process, &options); /* execute the netdata binary again as the netdata user */
if (0 != ret) {
netdata_log_error("uv_spawn (process: \"%s\") (user: %s) failed (%s).", netdata_exe_file, user, uv_strerror(ret));
fatal("Cannot start netdata without the spawn server.");
}
return ret;
}
#define CONCURRENT_SPAWNS 16
#define SPAWN_ITERATIONS 10000
#undef CONCURRENT_STRESS_TEST
void spawn_init(void)
{
struct completion completion;
int error;
netdata_log_info("Initializing spawn client.");
init_spawn_cmd_queue();
completion_init(&completion);
error = uv_thread_create(&thread, spawn_client, &completion);
if (error) {
netdata_log_error("uv_thread_create(): %s", uv_strerror(error));
goto after_error;
}
/* wait for spawn client thread to initialize */
completion_wait_for(&completion);
completion_destroy(&completion);
if (spawn_thread_error) {
error = uv_thread_join(&thread);
if (error) {
netdata_log_error("uv_thread_create(): %s", uv_strerror(error));
}
goto after_error;
}
#ifdef CONCURRENT_STRESS_TEST
signals_reset();
signals_unblock();
sleep(60);
uint64_t serial[CONCURRENT_SPAWNS];
for (int j = 0 ; j < SPAWN_ITERATIONS ; ++j) {
for (int i = 0; i < CONCURRENT_SPAWNS; ++i) {
char cmd[64];
sprintf(cmd, "echo CONCURRENT_STRESS_TEST %d 1>&2", j * CONCURRENT_SPAWNS + i + 1);
serial[i] = spawn_enq_cmd(cmd);
netdata_log_info("Queued command %s for spawning.", cmd);
}
int exit_status;
time_t exec_run_timestamp;
for (int i = 0; i < CONCURRENT_SPAWNS; ++i) {
netdata_log_info("Started waiting for serial %llu exit status %d run timestamp %llu.", serial[i], exit_status,
exec_run_timestamp);
spawn_wait_cmd(serial[i], &exit_status, &exec_run_timestamp);
netdata_log_info("Finished waiting for serial %llu exit status %d run timestamp %llu.", serial[i], exit_status,
exec_run_timestamp);
}
}
exit(0);
#endif
return;
after_error:
netdata_log_error("Failed to initialize spawn service. The alarms notifications will not be spawned.");
}

View file

@ -1,109 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef NETDATA_SPAWN_H
#define NETDATA_SPAWN_H 1
#include "daemon/common.h"
#define SPAWN_SERVER_COMMAND_LINE_ARGUMENT "--special-spawn-server"
typedef enum spawn_protocol {
SPAWN_PROT_EXEC_CMD = 0,
SPAWN_PROT_SPAWN_RESULT,
SPAWN_PROT_CMD_EXIT_STATUS
} spawn_prot_t;
struct spawn_prot_exec_cmd {
uint16_t command_length;
char command_to_run[];
};
struct spawn_prot_spawn_result {
pid_t exec_pid; /* 0 if failed to spawn */
time_t exec_run_timestamp; /* time of successfully spawning the command */
};
struct spawn_prot_cmd_exit_status {
int exec_exit_status;
};
struct spawn_prot_header {
spawn_prot_t opcode;
void *handle;
};
#undef SPAWN_DEBUG /* define to enable debug prints */
#define SPAWN_MAX_OUTSTANDING (32768)
#define SPAWN_CMD_PROCESSED 0x00000001
#define SPAWN_CMD_IN_PROGRESS 0x00000002
#define SPAWN_CMD_FAILED_TO_SPAWN 0x00000004
#define SPAWN_CMD_DONE 0x00000008
struct spawn_cmd_info {
avl_t avl;
/* concurrency control per command */
uv_mutex_t mutex;
uv_cond_t cond; /* users block here until command has finished */
uint64_t serial;
char *command_to_run;
int exit_status;
pid_t pid;
unsigned long flags;
time_t exec_run_timestamp; /* time of successfully spawning the command */
};
/* spawn command queue */
struct spawn_queue {
avl_tree_type cmd_tree;
/* concurrency control of command queue */
uv_mutex_t mutex;
uv_cond_t cond;
volatile unsigned size;
uint64_t latest_serial;
};
struct write_context {
uv_write_t write_req;
struct spawn_prot_header header;
struct spawn_prot_cmd_exit_status exit_status;
struct spawn_prot_spawn_result spawn_result;
struct spawn_prot_exec_cmd payload;
};
extern int spawn_thread_error;
extern int spawn_thread_shutdown;
extern uv_async_t spawn_async;
void spawn_init(void);
void spawn_server(void);
void spawn_client(void *arg);
void destroy_spawn_cmd(struct spawn_cmd_info *cmdinfo);
uint64_t spawn_enq_cmd(const char *command_to_run);
void spawn_wait_cmd(uint64_t serial, int *exit_status, time_t *exec_run_timestamp);
void spawn_deq_cmd(struct spawn_cmd_info *cmdinfo);
struct spawn_cmd_info *spawn_get_unprocessed_cmd(void);
int create_spawn_server(uv_loop_t *loop, uv_pipe_t *spawn_channel, uv_process_t *process);
/*
* Copies from the source buffer to the protocol buffer. It advances the source buffer by the amount copied. It
* subtracts the amount copied from the source length.
*/
static inline void copy_to_prot_buffer(char *prot_buffer, unsigned *prot_buffer_len, unsigned max_to_copy,
char **source, unsigned *source_len)
{
unsigned to_copy;
to_copy = MIN(max_to_copy, *source_len);
memcpy(prot_buffer + *prot_buffer_len, *source, to_copy);
*prot_buffer_len += to_copy;
*source += to_copy;
*source_len -= to_copy;
}
#endif //NETDATA_SPAWN_H

View file

@ -1,250 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "spawn.h"
static uv_process_t process;
static uv_pipe_t spawn_channel;
static uv_loop_t *loop;
uv_async_t spawn_async;
static char prot_buffer[MAX_COMMAND_LENGTH];
static unsigned prot_buffer_len = 0;
static void async_cb(uv_async_t *handle)
{
uv_stop(handle->loop);
}
static void after_pipe_write(uv_write_t* req, int status)
{
(void)status;
#ifdef SPAWN_DEBUG
netdata_log_info("CLIENT %s called status=%d", __func__, status);
#endif
void **data = req->data;
freez(data[0]);
freez(data[1]);
freez(data);
}
static void client_parse_spawn_protocol(unsigned source_len, char *source)
{
unsigned required_len;
struct spawn_prot_header *header;
struct spawn_prot_spawn_result *spawn_result;
struct spawn_prot_cmd_exit_status *exit_status;
struct spawn_cmd_info *cmdinfo;
while (source_len) {
required_len = sizeof(*header);
if (prot_buffer_len < required_len)
copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len);
if (prot_buffer_len < required_len)
return; /* Source buffer ran out */
header = (struct spawn_prot_header *)prot_buffer;
cmdinfo = (struct spawn_cmd_info *)header->handle;
fatal_assert(NULL != cmdinfo);
switch(header->opcode) {
case SPAWN_PROT_SPAWN_RESULT:
required_len += sizeof(*spawn_result);
if (prot_buffer_len < required_len)
copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len);
if (prot_buffer_len < required_len)
return; /* Source buffer ran out */
spawn_result = (struct spawn_prot_spawn_result *)(header + 1);
uv_mutex_lock(&cmdinfo->mutex);
cmdinfo->pid = spawn_result->exec_pid;
if (0 == cmdinfo->pid) { /* Failed to spawn */
#ifdef SPAWN_DEBUG
netdata_log_info("CLIENT %s SPAWN_PROT_SPAWN_RESULT failed to spawn.", __func__);
#endif
cmdinfo->flags |= SPAWN_CMD_FAILED_TO_SPAWN | SPAWN_CMD_DONE;
uv_cond_signal(&cmdinfo->cond);
} else {
cmdinfo->exec_run_timestamp = spawn_result->exec_run_timestamp;
cmdinfo->flags |= SPAWN_CMD_IN_PROGRESS;
#ifdef SPAWN_DEBUG
netdata_log_info("CLIENT %s SPAWN_PROT_SPAWN_RESULT in progress.", __func__);
#endif
}
uv_mutex_unlock(&cmdinfo->mutex);
prot_buffer_len = 0;
break;
case SPAWN_PROT_CMD_EXIT_STATUS:
required_len += sizeof(*exit_status);
if (prot_buffer_len < required_len)
copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len);
if (prot_buffer_len < required_len)
return; /* Source buffer ran out */
exit_status = (struct spawn_prot_cmd_exit_status *)(header + 1);
uv_mutex_lock(&cmdinfo->mutex);
cmdinfo->exit_status = exit_status->exec_exit_status;
#ifdef SPAWN_DEBUG
netdata_log_info("CLIENT %s SPAWN_PROT_CMD_EXIT_STATUS %d.", __func__, exit_status->exec_exit_status);
#endif
cmdinfo->flags |= SPAWN_CMD_DONE;
uv_cond_signal(&cmdinfo->cond);
uv_mutex_unlock(&cmdinfo->mutex);
prot_buffer_len = 0;
break;
default:
fatal_assert(0);
break;
}
}
}
static void on_pipe_read(uv_stream_t* pipe, ssize_t nread, const uv_buf_t* buf)
{
if (0 == nread) {
netdata_log_info("%s: Zero bytes read from spawn pipe.", __func__);
} else if (UV_EOF == nread) {
netdata_log_info("EOF found in spawn pipe.");
} else if (nread < 0) {
netdata_log_error("%s: %s", __func__, uv_strerror(nread));
}
if (nread < 0) { /* stop stream due to EOF or error */
(void)uv_read_stop((uv_stream_t *)pipe);
} else if (nread) {
#ifdef SPAWN_DEBUG
netdata_log_info("CLIENT %s read %u", __func__, (unsigned)nread);
#endif
client_parse_spawn_protocol(nread, buf->base);
}
if (buf && buf->len) {
freez(buf->base);
}
if (nread < 0) {
uv_close((uv_handle_t *)pipe, NULL);
}
}
static void on_read_alloc(uv_handle_t* handle,
size_t suggested_size,
uv_buf_t* buf)
{
(void)handle;
buf->base = mallocz(suggested_size);
buf->len = suggested_size;
}
static void spawn_process_cmd(struct spawn_cmd_info *cmdinfo)
{
int ret;
uv_buf_t *writebuf;
struct write_context *write_ctx;
void **data = callocz(2, sizeof(void *));
writebuf = callocz(3, sizeof(uv_buf_t));
write_ctx = callocz(1, sizeof(*write_ctx));
data[0] = write_ctx;
data[1] = writebuf;
write_ctx->write_req.data = data;
uv_mutex_lock(&cmdinfo->mutex);
cmdinfo->flags |= SPAWN_CMD_PROCESSED;
uv_mutex_unlock(&cmdinfo->mutex);
write_ctx->header.opcode = SPAWN_PROT_EXEC_CMD;
write_ctx->header.handle = cmdinfo;
write_ctx->payload.command_length = strlen(cmdinfo->command_to_run);
writebuf[0] = uv_buf_init((char *)&write_ctx->header, sizeof(write_ctx->header));
writebuf[1] = uv_buf_init((char *)&write_ctx->payload, sizeof(write_ctx->payload));
writebuf[2] = uv_buf_init((char *)cmdinfo->command_to_run, write_ctx->payload.command_length);
#ifdef SPAWN_DEBUG
netdata_log_info("CLIENT %s SPAWN_PROT_EXEC_CMD %u", __func__, (unsigned)cmdinfo->serial);
#endif
ret = uv_write(&write_ctx->write_req, (uv_stream_t *)&spawn_channel, writebuf, 3, after_pipe_write);
fatal_assert(ret == 0);
}
void spawn_client(void *arg)
{
uv_thread_set_name_np("DAEMON_SPAWN");
int ret;
struct completion *completion = (struct completion *)arg;
loop = mallocz(sizeof(uv_loop_t));
ret = uv_loop_init(loop);
if (ret) {
netdata_log_error("uv_loop_init(): %s", uv_strerror(ret));
spawn_thread_error = ret;
goto error_after_loop_init;
}
loop->data = NULL;
spawn_async.data = NULL;
ret = uv_async_init(loop, &spawn_async, async_cb);
if (ret) {
netdata_log_error("uv_async_init(): %s", uv_strerror(ret));
spawn_thread_error = ret;
goto error_after_async_init;
}
ret = uv_pipe_init(loop, &spawn_channel, 1);
if (ret) {
netdata_log_error("uv_pipe_init(): %s", uv_strerror(ret));
spawn_thread_error = ret;
goto error_after_pipe_init;
}
fatal_assert(spawn_channel.ipc);
ret = create_spawn_server(loop, &spawn_channel, &process);
if (ret) {
netdata_log_error("Failed to fork spawn server process.");
spawn_thread_error = ret;
goto error_after_spawn_server;
}
spawn_thread_error = 0;
spawn_thread_shutdown = 0;
/* wake up initialization thread */
completion_mark_complete(completion);
prot_buffer_len = 0;
ret = uv_read_start((uv_stream_t *)&spawn_channel, on_read_alloc, on_pipe_read);
fatal_assert(ret == 0);
while (spawn_thread_shutdown == 0) {
struct spawn_cmd_info *cmdinfo;
uv_run(loop, UV_RUN_DEFAULT);
while (NULL != (cmdinfo = spawn_get_unprocessed_cmd())) {
spawn_process_cmd(cmdinfo);
}
}
/* cleanup operations of the event loop */
netdata_log_info("Shutting down spawn client event loop.");
uv_close((uv_handle_t *)&spawn_channel, NULL);
uv_close((uv_handle_t *)&spawn_async, NULL);
uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */
netdata_log_info("Shutting down spawn client loop complete.");
fatal_assert(0 == uv_loop_close(loop));
return;
error_after_spawn_server:
uv_close((uv_handle_t *)&spawn_channel, NULL);
error_after_pipe_init:
uv_close((uv_handle_t *)&spawn_async, NULL);
error_after_async_init:
uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */
fatal_assert(0 == uv_loop_close(loop));
error_after_loop_init:
freez(loop);
/* wake up initialization thread */
completion_mark_complete(completion);
}

View file

@ -1,386 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "spawn.h"
static uv_loop_t *loop;
static uv_pipe_t server_pipe;
static int server_shutdown = 0;
static uv_thread_t thread;
/* spawn outstanding execution structure */
static avl_tree_lock spawn_outstanding_exec_tree;
static char prot_buffer[MAX_COMMAND_LENGTH];
static unsigned prot_buffer_len = 0;
struct spawn_execution_info {
avl_t avl;
void *handle;
int exit_status;
pid_t pid;
struct spawn_execution_info *next;
};
int spawn_exec_compare(void *a, void *b)
{
struct spawn_execution_info *spwna = a, *spwnb = b;
if (spwna->pid < spwnb->pid) return -1;
if (spwna->pid > spwnb->pid) return 1;
return 0;
}
/* wake up waiter thread to reap the spawned processes */
static uv_mutex_t wait_children_mutex;
static uv_cond_t wait_children_cond;
static uint8_t spawned_processes;
static struct spawn_execution_info *child_waited_list;
static uv_async_t child_waited_async;
static inline struct spawn_execution_info *dequeue_child_waited_list(void)
{
struct spawn_execution_info *exec_info;
uv_mutex_lock(&wait_children_mutex);
if (NULL == child_waited_list) {
exec_info = NULL;
} else {
exec_info = child_waited_list;
child_waited_list = exec_info->next;
}
uv_mutex_unlock(&wait_children_mutex);
return exec_info;
}
static inline void enqueue_child_waited_list(struct spawn_execution_info *exec_info)
{
uv_mutex_lock(&wait_children_mutex);
exec_info->next = child_waited_list;
child_waited_list = exec_info;
uv_mutex_unlock(&wait_children_mutex);
}
static void after_pipe_write(uv_write_t *req, int status)
{
(void)status;
#ifdef SPAWN_DEBUG
fprintf(stderr, "SERVER %s called status=%d\n", __func__, status);
#endif
void **data = req->data;
freez(data[0]);
freez(data[1]);
freez(data);
}
static void child_waited_async_cb(uv_async_t *async_handle)
{
uv_buf_t *writebuf;
int ret;
struct spawn_execution_info *exec_info;
struct write_context *write_ctx;
(void)async_handle;
while (NULL != (exec_info = dequeue_child_waited_list())) {
write_ctx = mallocz(sizeof(*write_ctx));
void **data = callocz(2, sizeof(void *));
writebuf = callocz(2, sizeof(uv_buf_t));
data[0] = write_ctx;
data[1] = writebuf;
write_ctx->write_req.data = data;
write_ctx->header.opcode = SPAWN_PROT_CMD_EXIT_STATUS;
write_ctx->header.handle = exec_info->handle;
write_ctx->exit_status.exec_exit_status = exec_info->exit_status;
writebuf[0] = uv_buf_init((char *) &write_ctx->header, sizeof(write_ctx->header));
writebuf[1] = uv_buf_init((char *) &write_ctx->exit_status, sizeof(write_ctx->exit_status));
#ifdef SPAWN_DEBUG
fprintf(stderr, "SERVER %s SPAWN_PROT_CMD_EXIT_STATUS\n", __func__);
#endif
ret = uv_write(&write_ctx->write_req, (uv_stream_t *) &server_pipe, writebuf, 2, after_pipe_write);
fatal_assert(ret == 0);
freez(exec_info);
}
}
static void wait_children(void *arg)
{
siginfo_t i;
struct spawn_execution_info tmp, *exec_info;
avl_t *ret_avl;
(void)arg;
while (!server_shutdown) {
uv_mutex_lock(&wait_children_mutex);
while (!spawned_processes) {
uv_cond_wait(&wait_children_cond, &wait_children_mutex);
}
spawned_processes = 0;
uv_mutex_unlock(&wait_children_mutex);
while (!server_shutdown) {
i.si_pid = 0;
if (os_waitid(P_ALL, (id_t) 0, &i, WEXITED) == -1) {
if (errno != ECHILD)
fprintf(stderr, "SPAWN: Failed to wait: %s\n", strerror(errno));
break;
}
if (i.si_pid == 0) {
fprintf(stderr, "SPAWN: No child exited.\n");
break;
}
#ifdef SPAWN_DEBUG
fprintf(stderr, "SPAWN: Successfully waited for pid:%d.\n", (int) i.si_pid);
#endif
fatal_assert(CLD_EXITED == i.si_code);
tmp.pid = (pid_t)i.si_pid;
while (NULL == (ret_avl = avl_remove_lock(&spawn_outstanding_exec_tree, (avl_t *)&tmp))) {
fprintf(stderr,
"SPAWN: race condition detected, waiting for child process %d to be indexed.\n",
(int)tmp.pid);
(void)sleep_usec(10000); /* 10 msec */
}
exec_info = (struct spawn_execution_info *)ret_avl;
exec_info->exit_status = i.si_status;
enqueue_child_waited_list(exec_info);
/* wake up event loop */
fatal_assert(0 == uv_async_send(&child_waited_async));
}
}
}
void spawn_protocol_execute_command(void *handle, char *command_to_run, uint16_t command_length)
{
uv_buf_t *writebuf;
int ret;
avl_t *avl_ret;
struct spawn_execution_info *exec_info;
struct write_context *write_ctx;
write_ctx = mallocz(sizeof(*write_ctx));
void **data = callocz(2, sizeof(void *));
writebuf = callocz(2, sizeof(uv_buf_t));
data[0] = write_ctx;
data[1] = writebuf;
write_ctx->write_req.data = data;
command_to_run[command_length] = '\0';
#ifdef SPAWN_DEBUG
fprintf(stderr, "SPAWN: executing command '%s'\n", command_to_run);
#endif
if (netdata_spawn(command_to_run, &write_ctx->spawn_result.exec_pid)) {
fprintf(stderr, "SPAWN: Cannot spawn(\"%s\", \"r\").\n", command_to_run);
write_ctx->spawn_result.exec_pid = 0;
} else { /* successfully spawned command */
write_ctx->spawn_result.exec_run_timestamp = now_realtime_sec();
/* record it for when the process finishes execution */
exec_info = mallocz(sizeof(*exec_info));
exec_info->handle = handle;
exec_info->pid = write_ctx->spawn_result.exec_pid;
avl_ret = avl_insert_lock(&spawn_outstanding_exec_tree, (avl_t *)exec_info);
fatal_assert(avl_ret == (avl_t *)exec_info);
/* wake up the thread that blocks waiting for processes to exit */
uv_mutex_lock(&wait_children_mutex);
spawned_processes = 1;
uv_cond_signal(&wait_children_cond);
uv_mutex_unlock(&wait_children_mutex);
}
write_ctx->header.opcode = SPAWN_PROT_SPAWN_RESULT;
write_ctx->header.handle = handle;
writebuf[0] = uv_buf_init((char *)&write_ctx->header, sizeof(write_ctx->header));
writebuf[1] = uv_buf_init((char *)&write_ctx->spawn_result, sizeof(write_ctx->spawn_result));
#ifdef SPAWN_DEBUG
fprintf(stderr, "SERVER %s SPAWN_PROT_SPAWN_RESULT\n", __func__);
#endif
ret = uv_write(&write_ctx->write_req, (uv_stream_t *)&server_pipe, writebuf, 2, after_pipe_write);
fatal_assert(ret == 0);
}
static void server_parse_spawn_protocol(unsigned source_len, char *source)
{
unsigned required_len;
struct spawn_prot_header *header;
struct spawn_prot_exec_cmd *payload;
uint16_t command_length;
while (source_len) {
required_len = sizeof(*header);
if (prot_buffer_len < required_len)
copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len);
if (prot_buffer_len < required_len)
return; /* Source buffer ran out */
header = (struct spawn_prot_header *)prot_buffer;
fatal_assert(SPAWN_PROT_EXEC_CMD == header->opcode);
fatal_assert(NULL != header->handle);
required_len += sizeof(*payload);
if (prot_buffer_len < required_len)
copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len);
if (prot_buffer_len < required_len)
return; /* Source buffer ran out */
payload = (struct spawn_prot_exec_cmd *)(header + 1);
command_length = payload->command_length;
required_len += command_length;
if (unlikely(required_len > MAX_COMMAND_LENGTH - 1)) {
fprintf(stderr, "SPAWN: Ran out of protocol buffer space.\n");
command_length = (MAX_COMMAND_LENGTH - 1) - (sizeof(*header) + sizeof(*payload));
required_len = MAX_COMMAND_LENGTH - 1;
}
if (prot_buffer_len < required_len)
copy_to_prot_buffer(prot_buffer, &prot_buffer_len, required_len - prot_buffer_len, &source, &source_len);
if (prot_buffer_len < required_len)
return; /* Source buffer ran out */
spawn_protocol_execute_command(header->handle, payload->command_to_run, command_length);
prot_buffer_len = 0;
}
}
static void on_pipe_read(uv_stream_t *pipe, ssize_t nread, const uv_buf_t *buf)
{
if (0 == nread) {
fprintf(stderr, "SERVER %s: Zero bytes read from spawn pipe.\n", __func__);
} else if (UV_EOF == nread) {
fprintf(stderr, "EOF found in spawn pipe.\n");
} else if (nread < 0) {
fprintf(stderr, "%s: %s\n", __func__, uv_strerror(nread));
}
if (nread < 0) { /* stop spawn server due to EOF or error */
int error;
uv_mutex_lock(&wait_children_mutex);
server_shutdown = 1;
spawned_processes = 1;
uv_cond_signal(&wait_children_cond);
uv_mutex_unlock(&wait_children_mutex);
fprintf(stderr, "Shutting down spawn server event loop.\n");
/* cleanup operations of the event loop */
(void)uv_read_stop((uv_stream_t *) pipe);
uv_close((uv_handle_t *)&server_pipe, NULL);
error = uv_thread_join(&thread);
if (error) {
fprintf(stderr, "uv_thread_create(): %s", uv_strerror(error));
}
/* After joining it is safe to destroy child_waited_async */
uv_close((uv_handle_t *)&child_waited_async, NULL);
} else if (nread) {
#ifdef SPAWN_DEBUG
fprintf(stderr, "SERVER %s nread %u\n", __func__, (unsigned)nread);
#endif
server_parse_spawn_protocol(nread, buf->base);
}
if (buf && buf->len) {
freez(buf->base);
}
}
static void on_read_alloc(uv_handle_t *handle,
size_t suggested_size,
uv_buf_t* buf)
{
(void)handle;
buf->base = mallocz(suggested_size);
buf->len = suggested_size;
}
static void ignore_signal_handler(int signo) {
/*
* By having a signal handler we allow spawned processes to reset default signal dispositions. Setting SIG_IGN
* would be inherited by the spawned children which is not desirable.
*/
(void)signo;
}
void spawn_server(void)
{
int error;
// initialize the system clocks
clocks_init();
// close all open file descriptors, except the standard ones
// the caller may have left open files (lxc-attach has this issue)
for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR);
// Have the libuv IPC pipe be closed when forking child processes
(void) fcntl(0, F_SETFD, FD_CLOEXEC);
fprintf(stderr, "Spawn server is up.\n");
// Define signals we want to ignore
struct sigaction sa;
int signals_to_ignore[] = {SIGPIPE, SIGINT, SIGQUIT, SIGTERM, SIGHUP, SIGUSR1, SIGUSR2, SIGBUS, SIGCHLD};
unsigned ignore_length = sizeof(signals_to_ignore) / sizeof(signals_to_ignore[0]);
unsigned i;
for (i = 0; i < ignore_length ; ++i) {
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sa.sa_handler = ignore_signal_handler;
if(sigaction(signals_to_ignore[i], &sa, NULL) == -1)
fprintf(stderr, "SPAWN: Failed to change signal handler for signal: %d.\n", signals_to_ignore[i]);
}
signals_unblock();
loop = uv_default_loop();
loop->data = NULL;
error = uv_pipe_init(loop, &server_pipe, 1);
if (error) {
fprintf(stderr, "uv_pipe_init(): %s\n", uv_strerror(error));
exit(error);
}
fatal_assert(server_pipe.ipc);
error = uv_pipe_open(&server_pipe, 0 /* UV_STDIN_FD */);
if (error) {
fprintf(stderr, "uv_pipe_open(): %s\n", uv_strerror(error));
exit(error);
}
avl_init_lock(&spawn_outstanding_exec_tree, spawn_exec_compare);
spawned_processes = 0;
fatal_assert(0 == uv_cond_init(&wait_children_cond));
fatal_assert(0 == uv_mutex_init(&wait_children_mutex));
child_waited_list = NULL;
error = uv_async_init(loop, &child_waited_async, child_waited_async_cb);
if (error) {
fprintf(stderr, "uv_async_init(): %s\n", uv_strerror(error));
exit(error);
}
error = uv_thread_create(&thread, wait_children, NULL);
if (error) {
fprintf(stderr, "uv_thread_create(): %s\n", uv_strerror(error));
exit(error);
}
prot_buffer_len = 0;
error = uv_read_start((uv_stream_t *)&server_pipe, on_read_alloc, on_pipe_read);
fatal_assert(error == 0);
while (!server_shutdown) {
uv_run(loop, UV_RUN_DEFAULT);
}
fprintf(stderr, "Shutting down spawn server loop complete.\n");
fatal_assert(0 == uv_loop_close(loop));
exit(0);
}

View file

@ -70,7 +70,7 @@ static inline int read_stream(struct receiver_state *r, char* buffer, size_t siz
ssize_t bytes_read;
do {
errno = 0;
errno_clear();
switch(wait_on_socket_or_cancel_with_timeout(
#ifdef ENABLE_HTTPS

View file

@ -54,7 +54,7 @@ char *netdata_ssl_ca_file = NULL;
#endif
static void load_stream_conf() {
errno = 0;
errno_clear();
char *filename = strdupz_path_subpath(netdata_configured_user_config_dir, "stream.conf");
if(!appconfig_load(&stream_config, filename, 0, NULL)) {
nd_log_daemon(NDLP_NOTICE, "CONFIG: cannot load user config '%s'. Will try stock config.", filename);

View file

@ -1894,7 +1894,7 @@ void *rrdpush_sender_thread(void *ptr) {
// protection from overflow
if(unlikely(s->flags & SENDER_FLAG_OVERFLOW)) {
worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_OVERFLOW);
errno = 0;
errno_clear();
netdata_log_error("STREAM %s [send to %s]: buffer full (allocated %zu bytes) after sending %zu bytes. Restarting connection",
rrdhost_hostname(s->host), s->connected_to, s->buffer->size, s->sent_bytes_on_this_connection);
rrdpush_sender_thread_close_socket(s->host);

View file

@ -691,7 +691,7 @@ static void rrdr_set_grouping_function(RRDR *r, RRDR_TIME_GROUPING group_method)
}
}
if(!found) {
errno = 0;
errno_clear();
internal_error(true, "QUERY: grouping method %u not found. Using 'average'", (unsigned int)group_method);
r->time_grouping.create = tg_average_create;
r->time_grouping.reset = tg_average_reset;

View file

@ -1855,7 +1855,7 @@ ssize_t web_client_receive(struct web_client *w)
// do we have any space for more data?
buffer_need_bytes(w->response.data, NETDATA_WEB_REQUEST_INITIAL_SIZE);
errno = 0;
errno_clear();
#ifdef ENABLE_HTTPS
if ( (web_client_check_conn_tcp(w)) && (netdata_ssl_web_server_ctx) ) {