0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-05-05 09:40:18 +00:00
netdata_netdata/exporting/send_data.c
Costa Tsaousis 8fc3b351a2
Allow netdata plugins to expose functions for querying more information about specific charts ()
* function renames and code cleanup in popen.c; no actual code changes

* netdata popen() now opens both child process stdin and stdout and returns FILE * for both

* pass both input and output to parser structures

* updated rrdset to call custom functions

* RRDSET FUNCTION leading calls for both sync and async operation

* put RRDSET functions to a separate file

* added format and timeout at function definition

* support for synchronous (internal plugins) and asynchronous (external plugins and children) functions

* /api/v1/function endpoint

* functions are now attached to the host and there is a dictionary view per chart

* functions implemented at plugins.d

* remove the defer until keyword hook from plugins.d when it is done

* stream sender implementation of functions

* sanitization of all functions so that certain characters are only allowed

* strictier sanitization

* common max size

* 1st working plugins.d example

* always init inflight dictionary

* properly destroy dictionaries to avoid parallel insertion of items

* add more debugging on disconnection reasons

* add more debugging on disconnection reasons again

* streaming receiver respects newlines

* dont use the same fp for both streaming receive and send

* dont free dbengine memory with internal checks

* make sender proceed in the buffer

* added timing info and garbage collection at plugins.d

* added info about routing nodes

* added info about routing nodes with delay

* added more info about delays

* added more info about delays again

* signal sending thread to wake up

* streaming version labeling and commented code to support capabilities

* added functions to /api/v1/data, /api/v1/charts, /api/v1/chart, /api/v1/info

* redirect top output to stdout

* address coverity findings

* fix resource leaks of popen

* log attempts to connect to individual destinations

* better messages

* properly parse destinations

* try to find a function from the most matching to the least matching

* log added streaming destinations

* rotate destinations bypassing a node in the middle that does not accept our connection

* break the loops properly

* use typedef to define callbacks

* capabilities negotiation during streaming

* functions exposed upstream based on capabilities; compression disabled per node persisting reconnects; always try to connect with all capabilities

* restore functionality to lookup functions

* better logging of capabilities

* remove old versions from capabilities when a newer version is there

* fix formatting

* optimization for plugins.d rrdlabels to avoid creating and destructing dictionaries all the time

* delayed health initialization for rrddim and rrdset

* cleanup health initialization

* fix for popen() not returning the right value

* add health worker jobs for initializing rrdset and rrddim

* added content type support for functions; apps.plugin permanent function to display all the processes

* fixes for functions parameters parsing in apps.plugin

* fix for process matching in apps.plugiin

* first working function for apps.plugin

* Dashboard ACL is disabled for functions; Function errors are all in JSON format

* apps.plugin function processes returns json table

* use json_escape_string() to escape message

* fix formatting

* apps.plugin exposes all its metrics to function processes

* fix json formatting when filtering out some rows

* reopen the internal pipe of rrdpush in case of errors

* misplaced statement

* do not use buffer->len

* support for GLOBAL functions (functions that are not linked to a chart

* added /api/v1/functions endpoint; removed format from the FUNCTIONS api;

* swagger documentation about the new api end points

* added plugins.d documentation about functions

* never re-close a file

* remove uncessesary ifdef

* fixed issues identified by codacy

* fix for null label value

* make edit-config copy-and-paste friendly

* Revert "make edit-config copy-and-paste friendly"

This reverts commit 54500c0e0a.

* reworked sender handshake to fix coverity findings

* timeout is zero, for both send_timeout() and recv_timeout()

* properly detect that parent closed the socket

* support caching of function responses; limit function response to 10MB; added protection from malformed function responses

* disabled excessive logging

* added units to apps.plugin function processes and normalized all values to be human readable

* shorter field names

* fixed issues reported

* fixed apps.plugin error response; tested that pluginsd can properly handle faulty responses

* use double linked list macros for double linked list management

* faster apps.plugin function printing by minimizing file operations

* added memory percentage

* fix compatibility issues with older compilers and FreeBSD

* rrdpush sender code cleanup; rrhost structure cleanup from sender flags and variables;

* fix letftover variable in ifdef

* apps.plugin: do not call detach from the thread; exit immediately when input is broken

* exclude AR charts from health

* flush cleaner; prefer sender output

* clarity

* do not fill the cbuffer if not connected

* fix

* dont enabled host->sender if streaming is not enabled; send host label updates to parent;

* functions are only available through ACLK

* Prepared statement reports only in dev mode

* fix AR chart detection

* fix for streaming not being enabling itself

* more cleanup of sender and receiver structures

* moved read-only flags and configuration options to rrdhost->options

* fixed merge with master

* fix for incomplete rename

* prevent service thread from working on charts that are being collected

Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
2022-10-05 14:13:46 +03:00

445 lines
16 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "exporting_engine.h"
/**
* Check if TLS is enabled in the configuration
*
* @param type buffer with response data.
* @param options an instance data structure.
* @return Returns 1 if TLS should be enabled, 0 otherwise.
*/
static int exporting_tls_is_enabled(EXPORTING_CONNECTOR_TYPE type, EXPORTING_OPTIONS options)
{
return (type == EXPORTING_CONNECTOR_TYPE_GRAPHITE_HTTP ||
type == EXPORTING_CONNECTOR_TYPE_JSON_HTTP ||
type == EXPORTING_CONNECTOR_TYPE_OPENTSDB_HTTP ||
type == EXPORTING_CONNECTOR_TYPE_PROMETHEUS_REMOTE_WRITE) &&
options & EXPORTING_OPTION_USE_TLS;
}
/**
* Discard response
*
* Discards a response received by an exporting connector instance after logging a sample of it to error.log
*
* @param buffer buffer with response data.
* @param instance an instance data structure.
* @return Always returns 0.
*/
int exporting_discard_response(BUFFER *buffer, struct instance *instance) {
#if NETDATA_INTERNAL_CHECKS
char sample[1024];
const char *s = buffer_tostring(buffer);
char *d = sample, *e = &sample[sizeof(sample) - 1];
for(; *s && d < e ;s++) {
char c = *s;
if(unlikely(!isprint(c))) c = ' ';
*d++ = c;
}
*d = '\0';
debug(
D_EXPORTING,
"EXPORTING: received %zu bytes from %s connector instance. Ignoring them. Sample: '%s'",
buffer_strlen(buffer),
instance->config.name,
sample);
#else
UNUSED(instance);
#endif /* NETDATA_INTERNAL_CHECKS */
buffer_flush(buffer);
return 0;
}
/**
* Receive response
*
* @param sock communication socket.
* @param instance an instance data structure.
*/
void simple_connector_receive_response(int *sock, struct instance *instance)
{
static BUFFER *response = NULL;
if (!response)
response = buffer_create(4096);
struct stats *stats = &instance->stats;
#ifdef ENABLE_HTTPS
uint32_t options = (uint32_t)instance->config.options;
struct simple_connector_data *connector_specific_data = instance->connector_specific_data;
if (options & EXPORTING_OPTION_USE_TLS)
ERR_clear_error();
#endif
errno = 0;
// loop through to collect all data
while (*sock != -1 && errno != EWOULDBLOCK) {
ssize_t r;
#ifdef ENABLE_HTTPS
if (exporting_tls_is_enabled(instance->config.type, options) &&
connector_specific_data->conn &&
connector_specific_data->flags == NETDATA_SSL_HANDSHAKE_COMPLETE) {
r = (ssize_t)SSL_read(connector_specific_data->conn,
&response->buffer[response->len],
(int) (response->size - response->len));
if (likely(r > 0)) {
// we received some data
response->len += r;
stats->received_bytes += r;
stats->receptions++;
continue;
} else {
int sslerrno = SSL_get_error(connector_specific_data->conn, (int) r);
u_long sslerr = ERR_get_error();
char buf[256];
switch (sslerrno) {
case SSL_ERROR_WANT_READ:
case SSL_ERROR_WANT_WRITE:
goto endloop;
default:
ERR_error_string_n(sslerr, buf, sizeof(buf));
error("SSL error (%s)",
ERR_error_string((long)SSL_get_error(connector_specific_data->conn, (int)r), NULL));
goto endloop;
}
}
} else {
r = recv(*sock, &response->buffer[response->len], response->size - response->len, MSG_DONTWAIT);
}
#else
r = recv(*sock, &response->buffer[response->len], response->size - response->len, MSG_DONTWAIT);
#endif
if (likely(r > 0)) {
// we received some data
response->len += r;
stats->received_bytes += r;
stats->receptions++;
} else if (r == 0) {
error("EXPORTING: '%s' closed the socket", instance->config.destination);
close(*sock);
*sock = -1;
} else {
// failed to receive data
if (errno != EAGAIN && errno != EWOULDBLOCK) {
error("EXPORTING: cannot receive data from '%s'.", instance->config.destination);
}
}
#ifdef UNIT_TESTING
break;
#endif
}
#ifdef ENABLE_HTTPS
endloop:
#endif
// if we received data, process them
if (buffer_strlen(response))
instance->check_response(response, instance);
}
/**
* Send buffer to a server
*
* @param sock communication socket.
* @param failures the number of communication failures.
* @param instance an instance data structure.
*/
void simple_connector_send_buffer(
int *sock, int *failures, struct instance *instance, BUFFER *header, BUFFER *buffer, size_t buffered_metrics)
{
int flags = 0;
#ifdef MSG_NOSIGNAL
flags += MSG_NOSIGNAL;
#endif
#ifdef ENABLE_HTTPS
uint32_t options = (uint32_t)instance->config.options;
struct simple_connector_data *connector_specific_data = instance->connector_specific_data;
if (options & EXPORTING_OPTION_USE_TLS)
ERR_clear_error();
#endif
struct stats *stats = &instance->stats;
ssize_t header_sent_bytes = 0;
ssize_t buffer_sent_bytes = 0;
size_t header_len = buffer_strlen(header);
size_t buffer_len = buffer_strlen(buffer);
#ifdef ENABLE_HTTPS
if (exporting_tls_is_enabled(instance->config.type, options) &&
connector_specific_data->conn &&
connector_specific_data->flags == NETDATA_SSL_HANDSHAKE_COMPLETE) {
if (header_len)
header_sent_bytes = (ssize_t)SSL_write(connector_specific_data->conn, buffer_tostring(header), header_len);
if ((size_t)header_sent_bytes == header_len)
buffer_sent_bytes = (ssize_t)SSL_write(connector_specific_data->conn, buffer_tostring(buffer), buffer_len);
} else {
if (header_len)
header_sent_bytes = send(*sock, buffer_tostring(header), header_len, flags);
if ((size_t)header_sent_bytes == header_len)
buffer_sent_bytes = send(*sock, buffer_tostring(buffer), buffer_len, flags);
}
#else
if (header_len)
header_sent_bytes = send(*sock, buffer_tostring(header), header_len, flags);
if ((size_t)header_sent_bytes == header_len)
buffer_sent_bytes = send(*sock, buffer_tostring(buffer), buffer_len, flags);
#endif
if ((size_t)buffer_sent_bytes == buffer_len) {
// we sent the data successfully
stats->transmission_successes++;
stats->sent_metrics += buffered_metrics;
stats->sent_bytes += buffer_sent_bytes;
// reset the failures count
*failures = 0;
// empty the buffer
buffer_flush(buffer);
} else {
// oops! we couldn't send (all or some of the) data
error(
"EXPORTING: failed to write data to '%s'. Willing to write %zu bytes, wrote %zd bytes. Will re-connect.",
instance->config.destination,
buffer_len,
buffer_sent_bytes);
stats->transmission_failures++;
if(buffer_sent_bytes != -1)
stats->sent_bytes += buffer_sent_bytes;
// increment the counter we check for data loss
(*failures)++;
// close the socket - we will re-open it next time
close(*sock);
*sock = -1;
}
}
/**
* Simple connector worker
*
* Runs in a separate thread for every instance.
*
* @param instance_p an instance data structure.
*/
void simple_connector_worker(void *instance_p)
{
struct instance *instance = (struct instance*)instance_p;
struct simple_connector_data *connector_specific_data = instance->connector_specific_data;
#ifdef ENABLE_HTTPS
uint32_t options = (uint32_t)instance->config.options;
if (options & EXPORTING_OPTION_USE_TLS)
ERR_clear_error();
#endif
struct simple_connector_config *connector_specific_config = instance->config.connector_specific_config;
int sock = -1;
struct timeval timeout = { .tv_sec = (instance->config.timeoutms * 1000) / 1000000,
.tv_usec = (instance->config.timeoutms * 1000) % 1000000 };
int failures = 0;
while (!instance->engine->exit) {
struct stats *stats = &instance->stats;
int send_stats = 0;
if (instance->data_is_ready)
send_stats = 1;
uv_mutex_lock(&instance->mutex);
if (!connector_specific_data->first_buffer->used || failures) {
while (!instance->data_is_ready)
uv_cond_wait(&instance->cond_var, &instance->mutex);
instance->data_is_ready = 0;
send_stats = 1;
}
if (unlikely(instance->engine->exit)) {
uv_mutex_unlock(&instance->mutex);
break;
}
// ------------------------------------------------------------------------
// detach buffer
size_t buffered_metrics;
if (!connector_specific_data->previous_buffer ||
(connector_specific_data->previous_buffer == connector_specific_data->first_buffer &&
connector_specific_data->first_buffer->used == 1)) {
BUFFER *header, *buffer;
header = connector_specific_data->first_buffer->header;
buffer = connector_specific_data->first_buffer->buffer;
connector_specific_data->buffered_metrics = connector_specific_data->first_buffer->buffered_metrics;
connector_specific_data->buffered_bytes = connector_specific_data->first_buffer->buffered_bytes;
buffered_metrics = connector_specific_data->buffered_metrics;
buffer_flush(connector_specific_data->header);
connector_specific_data->first_buffer->header = connector_specific_data->header;
connector_specific_data->header = header;
buffer_flush(connector_specific_data->buffer);
connector_specific_data->first_buffer->buffer = connector_specific_data->buffer;
connector_specific_data->buffer = buffer;
} else {
buffered_metrics = connector_specific_data->buffered_metrics;
}
uv_mutex_unlock(&instance->mutex);
// ------------------------------------------------------------------------
// if we are connected, receive a response, without blocking
if (likely(sock != -1))
simple_connector_receive_response(&sock, instance);
// ------------------------------------------------------------------------
// if we are not connected, connect to a data collecting server
if (unlikely(sock == -1)) {
size_t reconnects = 0;
sock = connect_to_one_of_urls(
instance->config.destination,
connector_specific_config->default_port,
&timeout,
&reconnects,
connector_specific_data->connected_to,
CONNECTED_TO_MAX);
#ifdef ENABLE_HTTPS
if (exporting_tls_is_enabled(instance->config.type, options) && sock != -1) {
if (netdata_ssl_exporting_ctx) {
if (sock_delnonblock(sock) < 0)
error("Exporting cannot remove the non-blocking flag from socket %d", sock);
if (connector_specific_data->conn == NULL) {
connector_specific_data->conn = SSL_new(netdata_ssl_exporting_ctx);
if (connector_specific_data->conn == NULL) {
error("Failed to allocate SSL structure to socket %d.", sock);
connector_specific_data->flags = NETDATA_SSL_NO_HANDSHAKE;
}
} else {
SSL_clear(connector_specific_data->conn);
}
if (connector_specific_data->conn) {
if (SSL_set_fd(connector_specific_data->conn, sock) != 1) {
error("Failed to set the socket to the SSL on socket fd %d.", sock);
connector_specific_data->flags = NETDATA_SSL_NO_HANDSHAKE;
} else {
connector_specific_data->flags = NETDATA_SSL_HANDSHAKE_COMPLETE;
SSL_set_connect_state(connector_specific_data->conn);
int err = SSL_connect(connector_specific_data->conn);
if (err != 1) {
err = SSL_get_error(connector_specific_data->conn, err);
error(
"SSL cannot connect with the server: %s ",
ERR_error_string((long)SSL_get_error(connector_specific_data->conn, err), NULL));
connector_specific_data->flags = NETDATA_SSL_NO_HANDSHAKE;
} else {
info("Exporting established a SSL connection.");
struct timeval tv;
tv.tv_sec = timeout.tv_sec / 4;
tv.tv_usec = 0;
if (!tv.tv_sec)
tv.tv_sec = 2;
if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (const char *)&tv, sizeof(tv)))
error("Cannot set timeout to socket %d, this can block communication", sock);
}
}
}
}
}
#endif
stats->reconnects += reconnects;
}
if (unlikely(instance->engine->exit))
break;
// ------------------------------------------------------------------------
// if we are connected, send our buffer to the data collecting server
failures = 0;
if (likely(sock != -1)) {
simple_connector_send_buffer(
&sock,
&failures,
instance,
connector_specific_data->header,
connector_specific_data->buffer,
buffered_metrics);
} else {
error("EXPORTING: failed to update '%s'", instance->config.destination);
stats->transmission_failures++;
// increment the counter we check for data loss
failures++;
}
if (!failures) {
connector_specific_data->first_buffer->buffered_metrics =
connector_specific_data->first_buffer->buffered_bytes = connector_specific_data->first_buffer->used = 0;
connector_specific_data->first_buffer = connector_specific_data->first_buffer->next;
}
if (unlikely(instance->engine->exit))
break;
if (send_stats) {
uv_mutex_lock(&instance->mutex);
stats->buffered_metrics = connector_specific_data->total_buffered_metrics;
send_internal_metrics(instance);
stats->buffered_metrics = 0;
// reset the internal monitoring chart counters
connector_specific_data->total_buffered_metrics =
stats->buffered_bytes =
stats->receptions =
stats->received_bytes =
stats->sent_metrics =
stats->sent_bytes =
stats->transmission_successes =
stats->transmission_failures =
stats->reconnects =
stats->data_lost_events =
stats->lost_metrics =
stats->lost_bytes = 0;
uv_mutex_unlock(&instance->mutex);
}
#ifdef UNIT_TESTING
return;
#endif
}
#if ENABLE_PROMETHEUS_REMOTE_WRITE
if (instance->config.type == EXPORTING_CONNECTOR_TYPE_PROMETHEUS_REMOTE_WRITE)
clean_prometheus_remote_write(instance);
#endif
simple_connector_cleanup(instance);
}