mirror of
https://github.com/netdata/netdata.git
synced 2025-05-05 09:40:18 +00:00

* function renames and code cleanup in popen.c; no actual code changes
* netdata popen() now opens both child process stdin and stdout and returns FILE * for both
* pass both input and output to parser structures
* updated rrdset to call custom functions
* RRDSET FUNCTION leading calls for both sync and async operation
* put RRDSET functions to a separate file
* added format and timeout at function definition
* support for synchronous (internal plugins) and asynchronous (external plugins and children) functions
* /api/v1/function endpoint
* functions are now attached to the host and there is a dictionary view per chart
* functions implemented at plugins.d
* remove the defer until keyword hook from plugins.d when it is done
* stream sender implementation of functions
* sanitization of all functions so that certain characters are only allowed
* strictier sanitization
* common max size
* 1st working plugins.d example
* always init inflight dictionary
* properly destroy dictionaries to avoid parallel insertion of items
* add more debugging on disconnection reasons
* add more debugging on disconnection reasons again
* streaming receiver respects newlines
* dont use the same fp for both streaming receive and send
* dont free dbengine memory with internal checks
* make sender proceed in the buffer
* added timing info and garbage collection at plugins.d
* added info about routing nodes
* added info about routing nodes with delay
* added more info about delays
* added more info about delays again
* signal sending thread to wake up
* streaming version labeling and commented code to support capabilities
* added functions to /api/v1/data, /api/v1/charts, /api/v1/chart, /api/v1/info
* redirect top output to stdout
* address coverity findings
* fix resource leaks of popen
* log attempts to connect to individual destinations
* better messages
* properly parse destinations
* try to find a function from the most matching to the least matching
* log added streaming destinations
* rotate destinations bypassing a node in the middle that does not accept our connection
* break the loops properly
* use typedef to define callbacks
* capabilities negotiation during streaming
* functions exposed upstream based on capabilities; compression disabled per node persisting reconnects; always try to connect with all capabilities
* restore functionality to lookup functions
* better logging of capabilities
* remove old versions from capabilities when a newer version is there
* fix formatting
* optimization for plugins.d rrdlabels to avoid creating and destructing dictionaries all the time
* delayed health initialization for rrddim and rrdset
* cleanup health initialization
* fix for popen() not returning the right value
* add health worker jobs for initializing rrdset and rrddim
* added content type support for functions; apps.plugin permanent function to display all the processes
* fixes for functions parameters parsing in apps.plugin
* fix for process matching in apps.plugiin
* first working function for apps.plugin
* Dashboard ACL is disabled for functions; Function errors are all in JSON format
* apps.plugin function processes returns json table
* use json_escape_string() to escape message
* fix formatting
* apps.plugin exposes all its metrics to function processes
* fix json formatting when filtering out some rows
* reopen the internal pipe of rrdpush in case of errors
* misplaced statement
* do not use buffer->len
* support for GLOBAL functions (functions that are not linked to a chart
* added /api/v1/functions endpoint; removed format from the FUNCTIONS api;
* swagger documentation about the new api end points
* added plugins.d documentation about functions
* never re-close a file
* remove uncessesary ifdef
* fixed issues identified by codacy
* fix for null label value
* make edit-config copy-and-paste friendly
* Revert "make edit-config copy-and-paste friendly"
This reverts commit 54500c0e0a
.
* reworked sender handshake to fix coverity findings
* timeout is zero, for both send_timeout() and recv_timeout()
* properly detect that parent closed the socket
* support caching of function responses; limit function response to 10MB; added protection from malformed function responses
* disabled excessive logging
* added units to apps.plugin function processes and normalized all values to be human readable
* shorter field names
* fixed issues reported
* fixed apps.plugin error response; tested that pluginsd can properly handle faulty responses
* use double linked list macros for double linked list management
* faster apps.plugin function printing by minimizing file operations
* added memory percentage
* fix compatibility issues with older compilers and FreeBSD
* rrdpush sender code cleanup; rrhost structure cleanup from sender flags and variables;
* fix letftover variable in ifdef
* apps.plugin: do not call detach from the thread; exit immediately when input is broken
* exclude AR charts from health
* flush cleaner; prefer sender output
* clarity
* do not fill the cbuffer if not connected
* fix
* dont enabled host->sender if streaming is not enabled; send host label updates to parent;
* functions are only available through ACLK
* Prepared statement reports only in dev mode
* fix AR chart detection
* fix for streaming not being enabling itself
* more cleanup of sender and receiver structures
* moved read-only flags and configuration options to rrdhost->options
* fixed merge with master
* fix for incomplete rename
* prevent service thread from working on charts that are being collected
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
445 lines
16 KiB
C
445 lines
16 KiB
C
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include "exporting_engine.h"
|
|
|
|
/**
|
|
* Check if TLS is enabled in the configuration
|
|
*
|
|
* @param type buffer with response data.
|
|
* @param options an instance data structure.
|
|
* @return Returns 1 if TLS should be enabled, 0 otherwise.
|
|
*/
|
|
static int exporting_tls_is_enabled(EXPORTING_CONNECTOR_TYPE type, EXPORTING_OPTIONS options)
|
|
{
|
|
return (type == EXPORTING_CONNECTOR_TYPE_GRAPHITE_HTTP ||
|
|
type == EXPORTING_CONNECTOR_TYPE_JSON_HTTP ||
|
|
type == EXPORTING_CONNECTOR_TYPE_OPENTSDB_HTTP ||
|
|
type == EXPORTING_CONNECTOR_TYPE_PROMETHEUS_REMOTE_WRITE) &&
|
|
options & EXPORTING_OPTION_USE_TLS;
|
|
}
|
|
|
|
/**
|
|
* Discard response
|
|
*
|
|
* Discards a response received by an exporting connector instance after logging a sample of it to error.log
|
|
*
|
|
* @param buffer buffer with response data.
|
|
* @param instance an instance data structure.
|
|
* @return Always returns 0.
|
|
*/
|
|
int exporting_discard_response(BUFFER *buffer, struct instance *instance) {
|
|
#if NETDATA_INTERNAL_CHECKS
|
|
char sample[1024];
|
|
const char *s = buffer_tostring(buffer);
|
|
char *d = sample, *e = &sample[sizeof(sample) - 1];
|
|
|
|
for(; *s && d < e ;s++) {
|
|
char c = *s;
|
|
if(unlikely(!isprint(c))) c = ' ';
|
|
*d++ = c;
|
|
}
|
|
*d = '\0';
|
|
|
|
debug(
|
|
D_EXPORTING,
|
|
"EXPORTING: received %zu bytes from %s connector instance. Ignoring them. Sample: '%s'",
|
|
buffer_strlen(buffer),
|
|
instance->config.name,
|
|
sample);
|
|
#else
|
|
UNUSED(instance);
|
|
#endif /* NETDATA_INTERNAL_CHECKS */
|
|
|
|
buffer_flush(buffer);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Receive response
|
|
*
|
|
* @param sock communication socket.
|
|
* @param instance an instance data structure.
|
|
*/
|
|
void simple_connector_receive_response(int *sock, struct instance *instance)
|
|
{
|
|
static BUFFER *response = NULL;
|
|
if (!response)
|
|
response = buffer_create(4096);
|
|
|
|
struct stats *stats = &instance->stats;
|
|
#ifdef ENABLE_HTTPS
|
|
uint32_t options = (uint32_t)instance->config.options;
|
|
struct simple_connector_data *connector_specific_data = instance->connector_specific_data;
|
|
|
|
if (options & EXPORTING_OPTION_USE_TLS)
|
|
ERR_clear_error();
|
|
#endif
|
|
|
|
errno = 0;
|
|
|
|
// loop through to collect all data
|
|
while (*sock != -1 && errno != EWOULDBLOCK) {
|
|
ssize_t r;
|
|
#ifdef ENABLE_HTTPS
|
|
if (exporting_tls_is_enabled(instance->config.type, options) &&
|
|
connector_specific_data->conn &&
|
|
connector_specific_data->flags == NETDATA_SSL_HANDSHAKE_COMPLETE) {
|
|
r = (ssize_t)SSL_read(connector_specific_data->conn,
|
|
&response->buffer[response->len],
|
|
(int) (response->size - response->len));
|
|
|
|
if (likely(r > 0)) {
|
|
// we received some data
|
|
response->len += r;
|
|
stats->received_bytes += r;
|
|
stats->receptions++;
|
|
continue;
|
|
} else {
|
|
int sslerrno = SSL_get_error(connector_specific_data->conn, (int) r);
|
|
u_long sslerr = ERR_get_error();
|
|
char buf[256];
|
|
switch (sslerrno) {
|
|
case SSL_ERROR_WANT_READ:
|
|
case SSL_ERROR_WANT_WRITE:
|
|
goto endloop;
|
|
default:
|
|
ERR_error_string_n(sslerr, buf, sizeof(buf));
|
|
error("SSL error (%s)",
|
|
ERR_error_string((long)SSL_get_error(connector_specific_data->conn, (int)r), NULL));
|
|
goto endloop;
|
|
}
|
|
}
|
|
} else {
|
|
r = recv(*sock, &response->buffer[response->len], response->size - response->len, MSG_DONTWAIT);
|
|
}
|
|
#else
|
|
r = recv(*sock, &response->buffer[response->len], response->size - response->len, MSG_DONTWAIT);
|
|
#endif
|
|
if (likely(r > 0)) {
|
|
// we received some data
|
|
response->len += r;
|
|
stats->received_bytes += r;
|
|
stats->receptions++;
|
|
} else if (r == 0) {
|
|
error("EXPORTING: '%s' closed the socket", instance->config.destination);
|
|
close(*sock);
|
|
*sock = -1;
|
|
} else {
|
|
// failed to receive data
|
|
if (errno != EAGAIN && errno != EWOULDBLOCK) {
|
|
error("EXPORTING: cannot receive data from '%s'.", instance->config.destination);
|
|
}
|
|
}
|
|
|
|
#ifdef UNIT_TESTING
|
|
break;
|
|
#endif
|
|
}
|
|
#ifdef ENABLE_HTTPS
|
|
endloop:
|
|
#endif
|
|
|
|
// if we received data, process them
|
|
if (buffer_strlen(response))
|
|
instance->check_response(response, instance);
|
|
}
|
|
|
|
/**
|
|
* Send buffer to a server
|
|
*
|
|
* @param sock communication socket.
|
|
* @param failures the number of communication failures.
|
|
* @param instance an instance data structure.
|
|
*/
|
|
void simple_connector_send_buffer(
|
|
int *sock, int *failures, struct instance *instance, BUFFER *header, BUFFER *buffer, size_t buffered_metrics)
|
|
{
|
|
int flags = 0;
|
|
#ifdef MSG_NOSIGNAL
|
|
flags += MSG_NOSIGNAL;
|
|
#endif
|
|
|
|
#ifdef ENABLE_HTTPS
|
|
uint32_t options = (uint32_t)instance->config.options;
|
|
struct simple_connector_data *connector_specific_data = instance->connector_specific_data;
|
|
|
|
if (options & EXPORTING_OPTION_USE_TLS)
|
|
ERR_clear_error();
|
|
#endif
|
|
|
|
struct stats *stats = &instance->stats;
|
|
ssize_t header_sent_bytes = 0;
|
|
ssize_t buffer_sent_bytes = 0;
|
|
size_t header_len = buffer_strlen(header);
|
|
size_t buffer_len = buffer_strlen(buffer);
|
|
|
|
#ifdef ENABLE_HTTPS
|
|
if (exporting_tls_is_enabled(instance->config.type, options) &&
|
|
connector_specific_data->conn &&
|
|
connector_specific_data->flags == NETDATA_SSL_HANDSHAKE_COMPLETE) {
|
|
if (header_len)
|
|
header_sent_bytes = (ssize_t)SSL_write(connector_specific_data->conn, buffer_tostring(header), header_len);
|
|
if ((size_t)header_sent_bytes == header_len)
|
|
buffer_sent_bytes = (ssize_t)SSL_write(connector_specific_data->conn, buffer_tostring(buffer), buffer_len);
|
|
} else {
|
|
if (header_len)
|
|
header_sent_bytes = send(*sock, buffer_tostring(header), header_len, flags);
|
|
if ((size_t)header_sent_bytes == header_len)
|
|
buffer_sent_bytes = send(*sock, buffer_tostring(buffer), buffer_len, flags);
|
|
}
|
|
#else
|
|
if (header_len)
|
|
header_sent_bytes = send(*sock, buffer_tostring(header), header_len, flags);
|
|
if ((size_t)header_sent_bytes == header_len)
|
|
buffer_sent_bytes = send(*sock, buffer_tostring(buffer), buffer_len, flags);
|
|
#endif
|
|
|
|
if ((size_t)buffer_sent_bytes == buffer_len) {
|
|
// we sent the data successfully
|
|
stats->transmission_successes++;
|
|
stats->sent_metrics += buffered_metrics;
|
|
stats->sent_bytes += buffer_sent_bytes;
|
|
|
|
// reset the failures count
|
|
*failures = 0;
|
|
|
|
// empty the buffer
|
|
buffer_flush(buffer);
|
|
} else {
|
|
// oops! we couldn't send (all or some of the) data
|
|
error(
|
|
"EXPORTING: failed to write data to '%s'. Willing to write %zu bytes, wrote %zd bytes. Will re-connect.",
|
|
instance->config.destination,
|
|
buffer_len,
|
|
buffer_sent_bytes);
|
|
stats->transmission_failures++;
|
|
|
|
if(buffer_sent_bytes != -1)
|
|
stats->sent_bytes += buffer_sent_bytes;
|
|
|
|
// increment the counter we check for data loss
|
|
(*failures)++;
|
|
|
|
// close the socket - we will re-open it next time
|
|
close(*sock);
|
|
*sock = -1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Simple connector worker
|
|
*
|
|
* Runs in a separate thread for every instance.
|
|
*
|
|
* @param instance_p an instance data structure.
|
|
*/
|
|
void simple_connector_worker(void *instance_p)
|
|
{
|
|
struct instance *instance = (struct instance*)instance_p;
|
|
struct simple_connector_data *connector_specific_data = instance->connector_specific_data;
|
|
|
|
#ifdef ENABLE_HTTPS
|
|
uint32_t options = (uint32_t)instance->config.options;
|
|
|
|
if (options & EXPORTING_OPTION_USE_TLS)
|
|
ERR_clear_error();
|
|
#endif
|
|
struct simple_connector_config *connector_specific_config = instance->config.connector_specific_config;
|
|
|
|
int sock = -1;
|
|
struct timeval timeout = { .tv_sec = (instance->config.timeoutms * 1000) / 1000000,
|
|
.tv_usec = (instance->config.timeoutms * 1000) % 1000000 };
|
|
int failures = 0;
|
|
|
|
while (!instance->engine->exit) {
|
|
struct stats *stats = &instance->stats;
|
|
int send_stats = 0;
|
|
|
|
if (instance->data_is_ready)
|
|
send_stats = 1;
|
|
|
|
uv_mutex_lock(&instance->mutex);
|
|
if (!connector_specific_data->first_buffer->used || failures) {
|
|
while (!instance->data_is_ready)
|
|
uv_cond_wait(&instance->cond_var, &instance->mutex);
|
|
instance->data_is_ready = 0;
|
|
send_stats = 1;
|
|
}
|
|
|
|
if (unlikely(instance->engine->exit)) {
|
|
uv_mutex_unlock(&instance->mutex);
|
|
break;
|
|
}
|
|
|
|
// ------------------------------------------------------------------------
|
|
// detach buffer
|
|
|
|
size_t buffered_metrics;
|
|
|
|
if (!connector_specific_data->previous_buffer ||
|
|
(connector_specific_data->previous_buffer == connector_specific_data->first_buffer &&
|
|
connector_specific_data->first_buffer->used == 1)) {
|
|
BUFFER *header, *buffer;
|
|
|
|
header = connector_specific_data->first_buffer->header;
|
|
buffer = connector_specific_data->first_buffer->buffer;
|
|
connector_specific_data->buffered_metrics = connector_specific_data->first_buffer->buffered_metrics;
|
|
connector_specific_data->buffered_bytes = connector_specific_data->first_buffer->buffered_bytes;
|
|
|
|
buffered_metrics = connector_specific_data->buffered_metrics;
|
|
|
|
buffer_flush(connector_specific_data->header);
|
|
connector_specific_data->first_buffer->header = connector_specific_data->header;
|
|
connector_specific_data->header = header;
|
|
|
|
buffer_flush(connector_specific_data->buffer);
|
|
connector_specific_data->first_buffer->buffer = connector_specific_data->buffer;
|
|
connector_specific_data->buffer = buffer;
|
|
} else {
|
|
buffered_metrics = connector_specific_data->buffered_metrics;
|
|
}
|
|
|
|
uv_mutex_unlock(&instance->mutex);
|
|
|
|
// ------------------------------------------------------------------------
|
|
// if we are connected, receive a response, without blocking
|
|
|
|
if (likely(sock != -1))
|
|
simple_connector_receive_response(&sock, instance);
|
|
|
|
// ------------------------------------------------------------------------
|
|
// if we are not connected, connect to a data collecting server
|
|
|
|
if (unlikely(sock == -1)) {
|
|
size_t reconnects = 0;
|
|
|
|
sock = connect_to_one_of_urls(
|
|
instance->config.destination,
|
|
connector_specific_config->default_port,
|
|
&timeout,
|
|
&reconnects,
|
|
connector_specific_data->connected_to,
|
|
CONNECTED_TO_MAX);
|
|
#ifdef ENABLE_HTTPS
|
|
if (exporting_tls_is_enabled(instance->config.type, options) && sock != -1) {
|
|
if (netdata_ssl_exporting_ctx) {
|
|
if (sock_delnonblock(sock) < 0)
|
|
error("Exporting cannot remove the non-blocking flag from socket %d", sock);
|
|
|
|
if (connector_specific_data->conn == NULL) {
|
|
connector_specific_data->conn = SSL_new(netdata_ssl_exporting_ctx);
|
|
if (connector_specific_data->conn == NULL) {
|
|
error("Failed to allocate SSL structure to socket %d.", sock);
|
|
connector_specific_data->flags = NETDATA_SSL_NO_HANDSHAKE;
|
|
}
|
|
} else {
|
|
SSL_clear(connector_specific_data->conn);
|
|
}
|
|
|
|
if (connector_specific_data->conn) {
|
|
if (SSL_set_fd(connector_specific_data->conn, sock) != 1) {
|
|
error("Failed to set the socket to the SSL on socket fd %d.", sock);
|
|
connector_specific_data->flags = NETDATA_SSL_NO_HANDSHAKE;
|
|
} else {
|
|
connector_specific_data->flags = NETDATA_SSL_HANDSHAKE_COMPLETE;
|
|
SSL_set_connect_state(connector_specific_data->conn);
|
|
int err = SSL_connect(connector_specific_data->conn);
|
|
if (err != 1) {
|
|
err = SSL_get_error(connector_specific_data->conn, err);
|
|
error(
|
|
"SSL cannot connect with the server: %s ",
|
|
ERR_error_string((long)SSL_get_error(connector_specific_data->conn, err), NULL));
|
|
connector_specific_data->flags = NETDATA_SSL_NO_HANDSHAKE;
|
|
} else {
|
|
info("Exporting established a SSL connection.");
|
|
|
|
struct timeval tv;
|
|
tv.tv_sec = timeout.tv_sec / 4;
|
|
tv.tv_usec = 0;
|
|
|
|
if (!tv.tv_sec)
|
|
tv.tv_sec = 2;
|
|
|
|
if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (const char *)&tv, sizeof(tv)))
|
|
error("Cannot set timeout to socket %d, this can block communication", sock);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
stats->reconnects += reconnects;
|
|
}
|
|
|
|
if (unlikely(instance->engine->exit))
|
|
break;
|
|
|
|
// ------------------------------------------------------------------------
|
|
// if we are connected, send our buffer to the data collecting server
|
|
|
|
failures = 0;
|
|
|
|
if (likely(sock != -1)) {
|
|
simple_connector_send_buffer(
|
|
&sock,
|
|
&failures,
|
|
instance,
|
|
connector_specific_data->header,
|
|
connector_specific_data->buffer,
|
|
buffered_metrics);
|
|
} else {
|
|
error("EXPORTING: failed to update '%s'", instance->config.destination);
|
|
stats->transmission_failures++;
|
|
|
|
// increment the counter we check for data loss
|
|
failures++;
|
|
}
|
|
|
|
if (!failures) {
|
|
connector_specific_data->first_buffer->buffered_metrics =
|
|
connector_specific_data->first_buffer->buffered_bytes = connector_specific_data->first_buffer->used = 0;
|
|
connector_specific_data->first_buffer = connector_specific_data->first_buffer->next;
|
|
}
|
|
|
|
if (unlikely(instance->engine->exit))
|
|
break;
|
|
|
|
if (send_stats) {
|
|
uv_mutex_lock(&instance->mutex);
|
|
|
|
stats->buffered_metrics = connector_specific_data->total_buffered_metrics;
|
|
|
|
send_internal_metrics(instance);
|
|
|
|
stats->buffered_metrics = 0;
|
|
|
|
// reset the internal monitoring chart counters
|
|
connector_specific_data->total_buffered_metrics =
|
|
stats->buffered_bytes =
|
|
stats->receptions =
|
|
stats->received_bytes =
|
|
stats->sent_metrics =
|
|
stats->sent_bytes =
|
|
stats->transmission_successes =
|
|
stats->transmission_failures =
|
|
stats->reconnects =
|
|
stats->data_lost_events =
|
|
stats->lost_metrics =
|
|
stats->lost_bytes = 0;
|
|
|
|
uv_mutex_unlock(&instance->mutex);
|
|
}
|
|
|
|
#ifdef UNIT_TESTING
|
|
return;
|
|
#endif
|
|
}
|
|
|
|
#if ENABLE_PROMETHEUS_REMOTE_WRITE
|
|
if (instance->config.type == EXPORTING_CONNECTOR_TYPE_PROMETHEUS_REMOTE_WRITE)
|
|
clean_prometheus_remote_write(instance);
|
|
#endif
|
|
|
|
simple_connector_cleanup(instance);
|
|
}
|