0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-16 02:24:15 +00:00
netdata_netdata/database/sqlite/sqlite_aclk_node.c
Costa Tsaousis 3e508c8f95
New logging layer ()
* cleanup of logging - wip

* first working iteration

* add errno annotator

* replace old logging functions with netdata_logger()

* cleanup

* update error_limit

* fix remanining error_limit references

* work on fatal()

* started working on structured logs

* full cleanup

* default logging to files; fix all plugins initialization

* fix formatting of numbers

* cleanup and reorg

* fix coverity issues

* cleanup obsolete code

* fix formatting of numbers

* fix log rotation

* fix for older systems

* add detection of systemd journal via stderr

* finished on access.log

* remove left-over transport

* do not add empty fields to the logs

* journal get compact uuids; X-Transaction-ID header is added in web responses

* allow compiling on systems without memfd sealing

* added libnetdata/uuid directory

* move datetime formatters to libnetdata

* add missing files

* link the makefiles in libnetdata

* added uuid_parse_flexi() to parse UUIDs with and without hyphens; the web server now read X-Transaction-ID and uses it for functions and web responses

* added stream receiver, sender, proc plugin and pluginsd log stack

* iso8601 advanced usage; line_splitter module in libnetdata; code cleanup

* add message ids to streaming inbound and outbound connections

* cleanup line_splitter between lines to avoid logging garbage; when killing children, kill them with SIGABRT if internal checks is enabled

* send SIGABRT to external plugins only if we are not shutting down

* fix cross cleanup in pluginsd parser

* fatal when there is a stack error in logs

* compile netdata with -fexceptions

* do not kill external plugins with SIGABRT

* metasync info logs to debug level

* added severity to logs

* added json output; added options per log output; added documentation; fixed issues mentioned

* allow memfd only on linux

* moved journal low level functions to journal.c/h

* move health logs to daemon.log with proper priorities

* fixed a couple of bugs; health log in journal

* updated docs

* systemd-cat-native command to push structured logs to journal from the command line

* fix makefiles

* restored NETDATA_LOG_SEVERITY_LEVEL

* fix makefiles

* systemd-cat-native can also work as the logger of Netdata scripts

* do not require a socket to systemd-journal to log-as-netdata

* alarm notify logs in native format

* properly compare log ids

* fatals log alerts; alarm-notify.sh working

* fix overflow warning

* alarm-notify.sh now logs the request (command line)

* anotate external plugins logs with the function cmd they run

* added context, component and type to alarm-notify.sh; shell sanitization removes control character and characters that may be expanded by bash

* reformatted alarm-notify logs

* unify cgroup-network-helper.sh

* added quotes around params

* charts.d.plugin switched logging to journal native

* quotes for logfmt

* unify the status codes of streaming receivers and senders

* alarm-notify: dont log anything, if there is nothing to do

* all external plugins log to stderr when running outside netdata; alarm-notify now shows an error when notifications menthod are needed but are not available

* migrate cgroup-name.sh to new logging

* systemd-cat-native now supports messages with newlines

* socket.c logs use priority

* cleanup log field types

* inherit the systemd set INVOCATION_ID if found

* allow systemd-cat-native to send messages to a systemd-journal-remote URL

* log2journal command that can convert structured logs to journal export format

* various fixes and documentation of log2journal

* updated log2journal docs

* updated log2journal docs

* updated documentation of fields

* allow compiling without libcurl

* do not use socket as format string

* added version information to newly added tools

* updated documentation and help messages

* fix the namespace socket path

* print errno with error

* do not timeout

* updated docs

* updated docs

* updated docs

* log2journal updated docs and params

* when talking to a remote journal, systemd-cat-native batches the messages

* enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote

* Revert "enable lz4 compression for systemd-cat-native when sending messages to a systemd-journal-remote"

This reverts commit b079d53c11.

* note about uncompressed traffic

* log2journal: code reorg and cleanup to make modular

* finished rewriting log2journal

* more comments

* rewriting rules support

* increased limits

* updated docs

* updated docs

* fix old log call

* use journal only when stderr is connected to journal

* update netdata.spec for libcurl, libpcre2 and log2journal

* pcre2-devel

* do not require pcre2 in centos < 8, amazonlinux < 2023, open suse

* log2journal only on systems pcre2 is available

* ignore log2journal in .gitignore

* avoid log2journal on centos 7, amazonlinux 2 and opensuse

* add pcre2-8 to static build

* undo last commit

* Bundle to static

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Add build deps for deb packages

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Add dependencies; build from source

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Test build for amazon linux and centos expect to fail for suse

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* fix minor oversight

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

* Reorg code

* Add the install from source (deps) as a TODO
* Not enable the build on suse ecosystem

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>

---------

Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud>
Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud>
2023-11-22 10:27:25 +02:00

178 lines
6.6 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "sqlite_functions.h"
#include "sqlite_aclk_node.h"
#include "../../aclk/aclk_contexts_api.h"
#include "../../aclk/aclk_capas.h"
#ifdef ENABLE_ACLK
DICTIONARY *collectors_from_charts(RRDHOST *host, DICTIONARY *dict) {
RRDSET *st;
char name[500];
rrdset_foreach_read(st, host) {
if (rrdset_is_available_for_viewers(st)) {
struct collector_info col = {
.plugin = rrdset_plugin_name(st),
.module = rrdset_module_name(st)
};
snprintfz(name, 499, "%s:%s", col.plugin, col.module);
dictionary_set(dict, name, &col, sizeof(struct collector_info));
}
}
rrdset_foreach_done(st);
return dict;
}
static void build_node_collectors(RRDHOST *host)
{
struct aclk_sync_cfg_t *wc = host->aclk_config;
struct update_node_collectors upd_node_collectors;
DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED);
upd_node_collectors.node_id = wc->node_id;
upd_node_collectors.claim_id = get_agent_claimid();
upd_node_collectors.node_collectors = collectors_from_charts(host, dict);
aclk_update_node_collectors(&upd_node_collectors);
dictionary_destroy(dict);
freez(upd_node_collectors.claim_id);
nd_log(NDLS_ACCESS, NDLP_DEBUG, "ACLK RES [%s (%s)]: NODE COLLECTORS SENT", wc->node_id, rrdhost_hostname(host));
}
static void build_node_info(RRDHOST *host)
{
struct update_node_info node_info;
struct aclk_sync_cfg_t *wc = host->aclk_config;
rrd_rdlock();
node_info.node_id = wc->node_id;
node_info.claim_id = get_agent_claimid();
node_info.machine_guid = host->machine_guid;
node_info.child = (wc->host != localhost);
node_info.ml_info.ml_capable = ml_capable();
node_info.ml_info.ml_enabled = ml_enabled(wc->host);
node_info.node_instance_capabilities = aclk_get_node_instance_capas(wc->host);
now_realtime_timeval(&node_info.updated_at);
char *host_version = NULL;
if (host != localhost) {
netdata_mutex_lock(&host->receiver_lock);
host_version = strdupz(host->receiver && host->receiver->program_version ? host->receiver->program_version : rrdhost_program_version(host));
netdata_mutex_unlock(&host->receiver_lock);
}
node_info.data.name = rrdhost_hostname(host);
node_info.data.os = rrdhost_os(host);
node_info.data.os_name = host->system_info->host_os_name;
node_info.data.os_version = host->system_info->host_os_version;
node_info.data.kernel_name = host->system_info->kernel_name;
node_info.data.kernel_version = host->system_info->kernel_version;
node_info.data.architecture = host->system_info->architecture;
node_info.data.cpus = host->system_info->host_cores ? str2uint32_t(host->system_info->host_cores, NULL) : 0;
node_info.data.cpu_frequency = host->system_info->host_cpu_freq ? host->system_info->host_cpu_freq : "0";
node_info.data.memory = host->system_info->host_ram_total ? host->system_info->host_ram_total : "0";
node_info.data.disk_space = host->system_info->host_disk_space ? host->system_info->host_disk_space : "0";
node_info.data.version = host_version ? host_version : VERSION;
node_info.data.release_channel = get_release_channel();
node_info.data.timezone = rrdhost_abbrev_timezone(host);
node_info.data.virtualization_type = host->system_info->virtualization ? host->system_info->virtualization : "unknown";
node_info.data.container_type = host->system_info->container ? host->system_info->container : "unknown";
node_info.data.custom_info = config_get(CONFIG_SECTION_WEB, "custom dashboard_info.js", "");
node_info.data.machine_guid = host->machine_guid;
struct capability node_caps[] = {
{ .name = "ml", .version = host->system_info->ml_capable, .enabled = host->system_info->ml_enabled },
{ .name = "mc", .version = host->system_info->mc_version ? host->system_info->mc_version : 0, .enabled = host->system_info->mc_version ? 1 : 0 },
{ .name = NULL, .version = 0, .enabled = 0 }
};
node_info.node_capabilities = node_caps;
node_info.data.ml_info.ml_capable = host->system_info->ml_capable;
node_info.data.ml_info.ml_enabled = host->system_info->ml_enabled;
node_info.data.host_labels_ptr = host->rrdlabels;
aclk_update_node_info(&node_info);
nd_log(NDLS_ACCESS, NDLP_DEBUG, "ACLK RES [%s (%s)]: NODE INFO SENT for guid [%s] (%s)", wc->node_id, rrdhost_hostname(wc->host), host->machine_guid, wc->host == localhost ? "parent" : "child");
rrd_unlock();
freez(node_info.claim_id);
freez(node_info.node_instance_capabilities);
freez(host_version);
wc->node_collectors_send = now_realtime_sec();
}
bool host_is_replicating(RRDHOST *host)
{
bool replicating = false;
RRDSET *st;
rrdset_foreach_reentrant(st, host) {
if (rrdset_is_replicating(st)) {
replicating = true;
break;
}
}
rrdset_foreach_done(st);
return replicating;
}
void aclk_check_node_info_and_collectors(void)
{
RRDHOST *host;
if (unlikely(!aclk_connected))
return;
size_t context_loading = 0;
size_t replicating = 0;
dfe_start_reentrant(rrdhost_root_index, host)
{
struct aclk_sync_cfg_t *wc = host->aclk_config;
if (unlikely(!wc))
continue;
if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD))) {
internal_error(true, "ACLK SYNC: Context still pending for %s", rrdhost_hostname(host));
context_loading++;
continue;
}
if (unlikely(host_is_replicating(host))) {
internal_error(true, "ACLK SYNC: Host %s is still replicating", rrdhost_hostname(host));
replicating++;
continue;
}
if (wc->node_info_send_time && wc->node_info_send_time + 30 < now_realtime_sec()) {
wc->node_info_send_time = 0;
build_node_info(host);
internal_error(true, "ACLK SYNC: Sending node info for %s", rrdhost_hostname(host));
}
if (wc->node_collectors_send && wc->node_collectors_send + 30 < now_realtime_sec()) {
build_node_collectors(host);
internal_error(true, "ACLK SYNC: Sending collectors for %s", rrdhost_hostname(host));
wc->node_collectors_send = 0;
}
}
dfe_done(host);
if (context_loading || replicating) {
nd_log_limit_static_thread_var(erl, 10, 100 * USEC_PER_MS);
nd_log_limit(&erl, NDLS_DAEMON, NDLP_INFO,
"%zu nodes loading contexts, %zu replicating data", context_loading, replicating);
}
}
#endif