mirror of
https://github.com/netdata/netdata.git
synced 2025-04-16 02:24:15 +00:00

* cleanup of logging - wip
* first working iteration
* add errno annotator
* replace old logging functions with netdata_logger()
* cleanup
* update error_limit
* fix remanining error_limit references
* work on fatal()
* started working on structured logs
* full cleanup
* default logging to files; fix all plugins initialization
* fix formatting of numbers
* cleanup and reorg
* fix coverity issues
* cleanup obsolete code
* fix formatting of numbers
* fix log rotation
* fix for older systems
* add detection of systemd journal via stderr
* finished on access.log
* remove left-over transport
* do not add empty fields to the logs
* journal get compact uuids; X-Transaction-ID header is added in web responses
* allow compiling on systems without memfd sealing
* added libnetdata/uuid directory
* move datetime formatters to libnetdata
* add missing files
* link the makefiles in libnetdata
* added uuid_parse_flexi() to parse UUIDs with and without hyphens; the web server now read X-Transaction-ID and uses it for functions and web responses
* added stream receiver, sender, proc plugin and pluginsd log stack
* iso8601 advanced usage; line_splitter module in libnetdata; code cleanup
* add message ids to streaming inbound and outbound connections
* cleanup line_splitter between lines to avoid logging garbage; when killing children, kill them with SIGABRT if internal checks is enabled
* send SIGABRT to external plugins only if we are not shutting down
* fix cross cleanup in pluginsd parser
* fatal when there is a stack error in logs
* compile netdata with -fexceptions
* do not kill external plugins with SIGABRT
* metasync info logs to debug level
* added severity to logs
* added json output; added options per log output; added documentation; fixed issues mentioned
* allow memfd only on linux
* moved journal low level functions to journal.c/h
* move health logs to daemon.log with proper priorities
* fixed a couple of bugs; health log in journal
* updated docs
* systemd-cat-native command to push structured logs to journal from the command line
* fix makefiles
* restored NETDATA_LOG_SEVERITY_LEVEL
* fix makefiles
* systemd-cat-native can also work as the logger of Netdata scripts
* do not require a socket to systemd-journal to log-as-netdata
* alarm notify logs in native format
* properly compare log ids
* fatals log alerts; alarm-notify.sh working
* fix overflow warning
* alarm-notify.sh now logs the request (command line)
* anotate external plugins logs with the function cmd they run
* added context, component and type to alarm-notify.sh; shell sanitization removes control character and characters that may be expanded by bash
* reformatted alarm-notify logs
* unify cgroup-network-helper.sh
* added quotes around params
* charts.d.plugin switched logging to journal native
* quotes for logfmt
* unify the status codes of streaming receivers and senders
* alarm-notify: dont log anything, if there is nothing to do
* all external plugins log to stderr when running outside netdata; alarm-notify now shows an error when notifications menthod are needed but are not available
* migrate cgroup-name.sh to new logging
* systemd-cat-native now supports messages with newlines
* socket.c logs use priority
* cleanup log field types
* inherit the systemd set INVOCATION_ID if found
* allow systemd-cat-native to send messages to a systemd-journal-remote URL
* log2journal command that can convert structured logs to journal export format
* various fixes and documentation of log2journal
* updated log2journal docs
* updated log2journal docs
* updated documentation of fields
* allow compiling without libcurl
* do not use socket as format string
* added version information to newly added tools
* updated documentation and help messages
* fix the namespace socket path
* print errno with error
* do not timeout
* updated docs
* updated docs
* updated docs
* log2journal updated docs and params
* when talking to a remote journal, systemd-cat-native batches the messages
* enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote
* Revert "enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote"
This reverts commit b079d53c11
.
* note about uncompressed traffic
* log2journal: code reorg and cleanup to make modular
* finished rewriting log2journal
* more comments
* rewriting rules support
* increased limits
* updated docs
* updated docs
* fix old log call
* use journal only when stderr is connected to journal
* update netdata.spec for libcurl, libpcre2 and log2journal
* pcre2-devel
* do not require pcre2 in centos < 8, amazonlinux < 2023, open suse
* log2journal only on systems pcre2 is available
* ignore log2journal in .gitignore
* avoid log2journal on centos 7, amazonlinux 2 and opensuse
* add pcre2-8 to static build
* undo last commit
* Bundle to static
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Add build deps for deb packages
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Add dependencies; build from source
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Test build for amazon linux and centos expect to fail for suse
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* fix minor oversight
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
* Reorg code
* Add the install from source (deps) as a TODO
* Not enable the build on suse ecosystem
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
---------
Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
178 lines
6.6 KiB
C
178 lines
6.6 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "sqlite_functions.h"
|
|
#include "sqlite_aclk_node.h"
|
|
|
|
#include "../../aclk/aclk_contexts_api.h"
|
|
#include "../../aclk/aclk_capas.h"
|
|
|
|
#ifdef ENABLE_ACLK
|
|
|
|
DICTIONARY *collectors_from_charts(RRDHOST *host, DICTIONARY *dict) {
|
|
RRDSET *st;
|
|
char name[500];
|
|
|
|
rrdset_foreach_read(st, host) {
|
|
if (rrdset_is_available_for_viewers(st)) {
|
|
struct collector_info col = {
|
|
.plugin = rrdset_plugin_name(st),
|
|
.module = rrdset_module_name(st)
|
|
};
|
|
snprintfz(name, 499, "%s:%s", col.plugin, col.module);
|
|
dictionary_set(dict, name, &col, sizeof(struct collector_info));
|
|
}
|
|
}
|
|
rrdset_foreach_done(st);
|
|
|
|
return dict;
|
|
}
|
|
|
|
static void build_node_collectors(RRDHOST *host)
|
|
{
|
|
struct aclk_sync_cfg_t *wc = host->aclk_config;
|
|
|
|
struct update_node_collectors upd_node_collectors;
|
|
DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED);
|
|
|
|
upd_node_collectors.node_id = wc->node_id;
|
|
upd_node_collectors.claim_id = get_agent_claimid();
|
|
|
|
upd_node_collectors.node_collectors = collectors_from_charts(host, dict);
|
|
aclk_update_node_collectors(&upd_node_collectors);
|
|
|
|
dictionary_destroy(dict);
|
|
freez(upd_node_collectors.claim_id);
|
|
|
|
nd_log(NDLS_ACCESS, NDLP_DEBUG, "ACLK RES [%s (%s)]: NODE COLLECTORS SENT", wc->node_id, rrdhost_hostname(host));
|
|
}
|
|
|
|
static void build_node_info(RRDHOST *host)
|
|
{
|
|
struct update_node_info node_info;
|
|
|
|
struct aclk_sync_cfg_t *wc = host->aclk_config;
|
|
|
|
rrd_rdlock();
|
|
node_info.node_id = wc->node_id;
|
|
node_info.claim_id = get_agent_claimid();
|
|
node_info.machine_guid = host->machine_guid;
|
|
node_info.child = (wc->host != localhost);
|
|
node_info.ml_info.ml_capable = ml_capable();
|
|
node_info.ml_info.ml_enabled = ml_enabled(wc->host);
|
|
|
|
node_info.node_instance_capabilities = aclk_get_node_instance_capas(wc->host);
|
|
|
|
now_realtime_timeval(&node_info.updated_at);
|
|
|
|
char *host_version = NULL;
|
|
if (host != localhost) {
|
|
netdata_mutex_lock(&host->receiver_lock);
|
|
host_version = strdupz(host->receiver && host->receiver->program_version ? host->receiver->program_version : rrdhost_program_version(host));
|
|
netdata_mutex_unlock(&host->receiver_lock);
|
|
}
|
|
|
|
node_info.data.name = rrdhost_hostname(host);
|
|
node_info.data.os = rrdhost_os(host);
|
|
node_info.data.os_name = host->system_info->host_os_name;
|
|
node_info.data.os_version = host->system_info->host_os_version;
|
|
node_info.data.kernel_name = host->system_info->kernel_name;
|
|
node_info.data.kernel_version = host->system_info->kernel_version;
|
|
node_info.data.architecture = host->system_info->architecture;
|
|
node_info.data.cpus = host->system_info->host_cores ? str2uint32_t(host->system_info->host_cores, NULL) : 0;
|
|
node_info.data.cpu_frequency = host->system_info->host_cpu_freq ? host->system_info->host_cpu_freq : "0";
|
|
node_info.data.memory = host->system_info->host_ram_total ? host->system_info->host_ram_total : "0";
|
|
node_info.data.disk_space = host->system_info->host_disk_space ? host->system_info->host_disk_space : "0";
|
|
node_info.data.version = host_version ? host_version : VERSION;
|
|
node_info.data.release_channel = get_release_channel();
|
|
node_info.data.timezone = rrdhost_abbrev_timezone(host);
|
|
node_info.data.virtualization_type = host->system_info->virtualization ? host->system_info->virtualization : "unknown";
|
|
node_info.data.container_type = host->system_info->container ? host->system_info->container : "unknown";
|
|
node_info.data.custom_info = config_get(CONFIG_SECTION_WEB, "custom dashboard_info.js", "");
|
|
node_info.data.machine_guid = host->machine_guid;
|
|
|
|
struct capability node_caps[] = {
|
|
{ .name = "ml", .version = host->system_info->ml_capable, .enabled = host->system_info->ml_enabled },
|
|
{ .name = "mc", .version = host->system_info->mc_version ? host->system_info->mc_version : 0, .enabled = host->system_info->mc_version ? 1 : 0 },
|
|
{ .name = NULL, .version = 0, .enabled = 0 }
|
|
};
|
|
node_info.node_capabilities = node_caps;
|
|
|
|
node_info.data.ml_info.ml_capable = host->system_info->ml_capable;
|
|
node_info.data.ml_info.ml_enabled = host->system_info->ml_enabled;
|
|
|
|
node_info.data.host_labels_ptr = host->rrdlabels;
|
|
|
|
aclk_update_node_info(&node_info);
|
|
nd_log(NDLS_ACCESS, NDLP_DEBUG, "ACLK RES [%s (%s)]: NODE INFO SENT for guid [%s] (%s)", wc->node_id, rrdhost_hostname(wc->host), host->machine_guid, wc->host == localhost ? "parent" : "child");
|
|
|
|
rrd_unlock();
|
|
freez(node_info.claim_id);
|
|
freez(node_info.node_instance_capabilities);
|
|
freez(host_version);
|
|
|
|
wc->node_collectors_send = now_realtime_sec();
|
|
}
|
|
|
|
bool host_is_replicating(RRDHOST *host)
|
|
{
|
|
bool replicating = false;
|
|
RRDSET *st;
|
|
rrdset_foreach_reentrant(st, host) {
|
|
if (rrdset_is_replicating(st)) {
|
|
replicating = true;
|
|
break;
|
|
}
|
|
}
|
|
rrdset_foreach_done(st);
|
|
return replicating;
|
|
}
|
|
|
|
void aclk_check_node_info_and_collectors(void)
|
|
{
|
|
RRDHOST *host;
|
|
|
|
if (unlikely(!aclk_connected))
|
|
return;
|
|
|
|
size_t context_loading = 0;
|
|
size_t replicating = 0;
|
|
dfe_start_reentrant(rrdhost_root_index, host)
|
|
{
|
|
struct aclk_sync_cfg_t *wc = host->aclk_config;
|
|
if (unlikely(!wc))
|
|
continue;
|
|
|
|
if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD))) {
|
|
internal_error(true, "ACLK SYNC: Context still pending for %s", rrdhost_hostname(host));
|
|
context_loading++;
|
|
continue;
|
|
}
|
|
|
|
if (unlikely(host_is_replicating(host))) {
|
|
internal_error(true, "ACLK SYNC: Host %s is still replicating", rrdhost_hostname(host));
|
|
replicating++;
|
|
continue;
|
|
}
|
|
|
|
if (wc->node_info_send_time && wc->node_info_send_time + 30 < now_realtime_sec()) {
|
|
wc->node_info_send_time = 0;
|
|
build_node_info(host);
|
|
internal_error(true, "ACLK SYNC: Sending node info for %s", rrdhost_hostname(host));
|
|
}
|
|
|
|
if (wc->node_collectors_send && wc->node_collectors_send + 30 < now_realtime_sec()) {
|
|
build_node_collectors(host);
|
|
internal_error(true, "ACLK SYNC: Sending collectors for %s", rrdhost_hostname(host));
|
|
wc->node_collectors_send = 0;
|
|
}
|
|
}
|
|
dfe_done(host);
|
|
|
|
if (context_loading || replicating) {
|
|
nd_log_limit_static_thread_var(erl, 10, 100 * USEC_PER_MS);
|
|
nd_log_limit(&erl, NDLS_DAEMON, NDLP_INFO,
|
|
"%zu nodes loading contexts, %zu replicating data", context_loading, replicating);
|
|
}
|
|
}
|
|
|
|
#endif
|