0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-22 20:42:33 +00:00
netdata_netdata/database/rrdfunctions-inflight.c
Costa Tsaousis f2b250a1f5
dyncfg v2 ()
* split rrdfunctions streaming and progress

* simplified internal inline functions API

* split rrdfunctions inflight management

* split rrd functions exporters

* renames

* base dyncfg structure

* config pluginsd

* intercept dyncfg function calls

* loading and saving of dyncfg metadata and data

* save metadata and payload to a single file; added code to update the plugins with jobs and saved configs

* basic working unit test

* added payload to functions execution

* removed old dyncfg code that is not needed any more

* more cleanup

* cleanup sender for functions with payload

* dyncfg functions are not exposed as functions

* remaining work to avoid indexing the \0 terminating character in dictionary keys

* added back old dyncfg plugins.d commands as noop, to allow plugins continue working

* working api; working streaming;

* updated plugins.d documentation

* aclk and http api requests share the same header parsing logic

* added source type internal

* fixed crashes

* added god mode for tests

* fixes

* fixed messages

* save host machine guids to configs

* cleaner manipulation of supported commands

* the functions event loop for external plugins can now process dyncfg requests

* unified internal and external plugins dyncfg API

* Netdata serves schema requests from /etc/netdata/schema.d and /var/lib/netdata/conf.d/schema.d

* cleanup and various fixes; fixed bug in previous dyncfg implementation on streaming that was sending the paylod in a way that allowed other streaming commands to be multiplexed

* internals go to a separate header file

* fix duplicate ACLK requests sent by aclk queue mechanism

* use fstat instead of stat

* working api

* plugin actions renamed to create and delete; dyncfg files are removed only from user actions

* prevent deadlock by using the react callback

* fix for string_strndupz()

* better dyncfg unittests

* more tests at the unittests

* properly detect dyncfg functions

* hide config functions from the UI

* tree response improvements

* send the initial update with payload

* determine tty using stdout, not stderr

* changes to statuses, cleanup and the code to bring all business logic into interception

* do not crash when the status is empty

* functions now propagate the source of the requests to plugins

* avoid warning about unused functions

* in the count at items for attention, do not count the orphan entries

* save source into dyncfg

* make the list null terminated

* fixed invalid comparison

* prevent memory leak on duplicated headers; log x-forwarded-for

* more unit tests

* added dyncfg unittests into the default unittests

* more unit tests and fixes

* more unit tests and fixes

* fix dictionary unittests

* config functions require admin access
2024-01-11 16:56:45 +02:00

641 lines
22 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#define NETDATA_RRD_INTERNALS
#include "rrdcollector-internals.h"
#include "rrdfunctions-internals.h"
#include "rrdfunctions-inflight.h"
struct rrd_function_inflight {
bool used;
RRDHOST *host;
uuid_t transaction_uuid;
const char *transaction;
const char *cmd;
const char *sanitized_cmd;
const char *source;
size_t sanitized_cmd_length;
int timeout;
bool cancelled;
usec_t stop_monotonic_ut;
BUFFER *payload;
const DICTIONARY_ITEM *host_function_acquired;
// the collector
// we acquire this structure at the beginning,
// and we release it at the end
struct rrd_host_function *rdcf;
struct {
BUFFER *wb;
// in async mode,
// the function to call to send the result back
rrd_function_result_callback_t cb;
void *data;
} result;
struct {
// to be called in sync mode
// while the function is running
// to check if the function has been canceled
rrd_function_is_cancelled_cb_t cb;
void *data;
} is_cancelled;
struct {
// to be registered by the function itself
// used to signal the function to cancel
rrd_function_cancel_cb_t cb;
void *data;
} canceller;
struct {
// callback to receive progress reports from function
rrd_function_progress_cb_t cb;
void *data;
} progress;
struct {
// to be registered by the function itself
// used to send progress requests to function
rrd_function_progresser_cb_t cb;
void *data;
} progresser;
};
static DICTIONARY *rrd_functions_inflight_requests = NULL;
static void rrd_function_cancel_inflight(struct rrd_function_inflight *r);
// ----------------------------------------------------------------------------
static void rrd_functions_inflight_cleanup(struct rrd_function_inflight *r) {
buffer_free(r->payload);
freez((void *)r->transaction);
freez((void *)r->cmd);
freez((void *)r->sanitized_cmd);
freez((void *)r->source);
r->payload = NULL;
r->transaction = NULL;
r->cmd = NULL;
r->sanitized_cmd = NULL;
}
static void rrd_functions_inflight_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
struct rrd_function_inflight *r = value;
// internal_error(true, "FUNCTIONS: transaction '%s' finished", r->transaction);
rrd_functions_inflight_cleanup(r);
dictionary_acquired_item_release(r->host->functions, r->host_function_acquired);
}
void rrd_functions_inflight_init(void) {
if(rrd_functions_inflight_requests)
return;
rrd_functions_inflight_requests = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct rrd_function_inflight));
dictionary_register_delete_callback(rrd_functions_inflight_requests, rrd_functions_inflight_delete_cb, NULL);
}
void rrd_functions_inflight_destroy(void) {
if(!rrd_functions_inflight_requests)
return;
dictionary_destroy(rrd_functions_inflight_requests);
rrd_functions_inflight_requests = NULL;
}
static void rrd_inflight_async_function_register_canceller_cb(void *register_canceller_cb_data, rrd_function_cancel_cb_t canceller_cb, void *canceller_cb_data) {
struct rrd_function_inflight *r = register_canceller_cb_data;
r->canceller.cb = canceller_cb;
r->canceller.data = canceller_cb_data;
}
static void rrd_inflight_async_function_register_progresser_cb(void *register_progresser_cb_data, rrd_function_progresser_cb_t progresser_cb, void *progresser_cb_data) {
struct rrd_function_inflight *r = register_progresser_cb_data;
r->progresser.cb = progresser_cb;
r->progresser.data = progresser_cb_data;
}
// ----------------------------------------------------------------------------
// waiting for async function completion
struct rrd_function_call_wait {
RRDHOST *host;
const DICTIONARY_ITEM *host_function_acquired;
char *transaction;
bool free_with_signal;
bool data_are_ready;
netdata_mutex_t mutex;
pthread_cond_t cond;
int code;
};
static void rrd_inflight_function_cleanup(RRDHOST *host __maybe_unused, const char *transaction) {
dictionary_del(rrd_functions_inflight_requests, transaction);
dictionary_garbage_collect(rrd_functions_inflight_requests);
}
static void rrd_function_call_wait_free(struct rrd_function_call_wait *tmp) {
rrd_inflight_function_cleanup(tmp->host, tmp->transaction);
freez(tmp->transaction);
pthread_cond_destroy(&tmp->cond);
netdata_mutex_destroy(&tmp->mutex);
freez(tmp);
}
static void rrd_async_function_signal_when_ready(BUFFER *temp_wb __maybe_unused, int code, void *callback_data) {
struct rrd_function_call_wait *tmp = callback_data;
bool we_should_free = false;
netdata_mutex_lock(&tmp->mutex);
// since we got the mutex,
// the waiting thread is either in pthread_cond_timedwait()
// or gave up and left.
tmp->code = code;
tmp->data_are_ready = true;
if(tmp->free_with_signal)
we_should_free = true;
pthread_cond_signal(&tmp->cond);
netdata_mutex_unlock(&tmp->mutex);
if(we_should_free) {
buffer_free(temp_wb);
rrd_function_call_wait_free(tmp);
}
}
static void rrd_inflight_async_function_nowait_finished(BUFFER *wb, int code, void *data) {
struct rrd_function_inflight *r = data;
if(r->result.cb)
r->result.cb(wb, code, r->result.data);
rrd_inflight_function_cleanup(r->host, r->transaction);
}
static bool rrd_inflight_async_function_is_cancelled(void *data) {
struct rrd_function_inflight *r = data;
return __atomic_load_n(&r->cancelled, __ATOMIC_RELAXED);
}
static inline int rrd_call_function_async_and_dont_wait(struct rrd_function_inflight *r) {
struct rrd_function_execute rfe = {
.transaction = &r->transaction_uuid,
.function = r->sanitized_cmd,
.payload = r->payload,
.source = r->source,
.stop_monotonic_ut = &r->stop_monotonic_ut,
.result = {
.wb = r->result.wb,
.cb = rrd_inflight_async_function_nowait_finished,
.data = r,
},
.progress = {
.cb = r->progress.cb,
.data = r->progress.data,
},
.is_cancelled = {
.cb = rrd_inflight_async_function_is_cancelled,
.data = r,
},
.register_canceller = {
.cb = rrd_inflight_async_function_register_canceller_cb,
.data = r,
},
.register_progresser = {
.cb = rrd_inflight_async_function_register_progresser_cb,
.data = r,
},
};
int code = r->rdcf->execute_cb(&rfe, r->rdcf->execute_cb_data);
return code;
}
static int rrd_call_function_async_and_wait(struct rrd_function_inflight *r) {
struct rrd_function_call_wait *tmp = mallocz(sizeof(struct rrd_function_call_wait));
tmp->free_with_signal = false;
tmp->data_are_ready = false;
tmp->host = r->host;
tmp->host_function_acquired = r->host_function_acquired;
tmp->transaction = strdupz(r->transaction);
netdata_mutex_init(&tmp->mutex);
pthread_cond_init(&tmp->cond, NULL);
// we need a temporary BUFFER, because we may time out and the caller supplied one may vanish,
// so we create a new one we guarantee will survive until the collector finishes...
bool we_should_free = false;
BUFFER *temp_wb = buffer_create(1024, &netdata_buffers_statistics.buffers_functions); // we need it because we may give up on it
temp_wb->content_type = r->result.wb->content_type;
struct rrd_function_execute rfe = {
.transaction = &r->transaction_uuid,
.function = r->sanitized_cmd,
.payload = r->payload,
.source = r->source,
.stop_monotonic_ut = &r->stop_monotonic_ut,
.result = {
.wb = temp_wb,
// we overwrite the result callbacks,
// so that we can clean up the allocations made
.cb = rrd_async_function_signal_when_ready,
.data = tmp,
},
.progress = {
.cb = r->progress.cb,
.data = r->progress.data,
},
.is_cancelled = {
.cb = rrd_inflight_async_function_is_cancelled,
.data = r,
},
.register_canceller = {
.cb = rrd_inflight_async_function_register_canceller_cb,
.data = r,
},
.register_progresser = {
.cb = rrd_inflight_async_function_register_progresser_cb,
.data = r,
},
};
int code = r->rdcf->execute_cb(&rfe, r->rdcf->execute_cb_data);
// this has to happen after we execute the callback
// because if an async call is responded in sync mode, there will be a deadlock.
netdata_mutex_lock(&tmp->mutex);
if (code == HTTP_RESP_OK || tmp->data_are_ready) {
bool cancelled = false;
int rc = 0;
while (rc == 0 && !cancelled && !tmp->data_are_ready) {
usec_t now_mono_ut = now_monotonic_usec();
usec_t stop_mono_ut = __atomic_load_n(&r->stop_monotonic_ut, __ATOMIC_RELAXED) + RRDFUNCTIONS_TIMEOUT_EXTENSION_UT;
if(now_mono_ut > stop_mono_ut) {
rc = ETIMEDOUT;
break;
}
// wait for 10ms, and loop again...
struct timespec tp;
clock_gettime(CLOCK_REALTIME, &tp);
tp.tv_nsec += 10 * NSEC_PER_MSEC;
if(tp.tv_nsec > (long)(1 * NSEC_PER_SEC)) {
tp.tv_sec++;
tp.tv_nsec -= 1 * NSEC_PER_SEC;
}
// the mutex is unlocked within pthread_cond_timedwait()
rc = pthread_cond_timedwait(&tmp->cond, &tmp->mutex, &tp);
// the mutex is again ours
if(rc == ETIMEDOUT) {
// 10ms have passed
rc = 0;
if (!tmp->data_are_ready && r->is_cancelled.cb &&
r->is_cancelled.cb(r->is_cancelled.data)) {
// internal_error(true, "FUNCTIONS: transaction '%s' is cancelled while waiting for response",
// r->transaction);
cancelled = true;
rrd_function_cancel_inflight(r);
break;
}
}
}
if (tmp->data_are_ready) {
// we have a response
buffer_contents_replace(r->result.wb, buffer_tostring(temp_wb), buffer_strlen(temp_wb));
r->result.wb->content_type = temp_wb->content_type;
r->result.wb->expires = temp_wb->expires;
if(r->result.wb->expires)
buffer_cacheable(r->result.wb);
else
buffer_no_cacheable(r->result.wb);
code = tmp->code;
tmp->free_with_signal = false;
we_should_free = true;
}
else if (rc == ETIMEDOUT || cancelled) {
// timeout
// we will go away and let the callback free the structure
if(cancelled)
code = rrd_call_function_error(r->result.wb,
"Request cancelled",
HTTP_RESP_CLIENT_CLOSED_REQUEST);
else
code = rrd_call_function_error(r->result.wb,
"Timeout while waiting for a response from the collector.",
HTTP_RESP_GATEWAY_TIMEOUT);
tmp->free_with_signal = true;
we_should_free = false;
}
else {
code = rrd_call_function_error(
r->result.wb, "Internal error while communicating with the collector",
HTTP_RESP_INTERNAL_SERVER_ERROR);
tmp->free_with_signal = true;
we_should_free = false;
}
}
else {
// the response is not ok, and we don't have the data
tmp->free_with_signal = true;
we_should_free = false;
}
netdata_mutex_unlock(&tmp->mutex);
if (we_should_free) {
rrd_function_call_wait_free(tmp);
buffer_free(temp_wb);
}
return code;
}
static inline int rrd_call_function_async(struct rrd_function_inflight *r, bool wait) {
if(wait)
return rrd_call_function_async_and_wait(r);
else
return rrd_call_function_async_and_dont_wait(r);
}
// ----------------------------------------------------------------------------
int rrd_function_run(RRDHOST *host, BUFFER *result_wb, int timeout_s, HTTP_ACCESS access, const char *cmd,
bool wait, const char *transaction,
rrd_function_result_callback_t result_cb, void *result_cb_data,
rrd_function_progress_cb_t progress_cb, void *progress_cb_data,
rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data,
BUFFER *payload, const char *source) {
int code;
char sanitized_cmd[PLUGINSD_LINE_MAX + 1];
const DICTIONARY_ITEM *host_function_acquired = NULL;
char sanitized_source[(source ? strlen(source) : 0) + 1];
rrd_functions_sanitize(sanitized_source, source ? source : "", sizeof(sanitized_source));
// ------------------------------------------------------------------------
// find the function
size_t sanitized_cmd_length = rrd_functions_sanitize(sanitized_cmd, cmd, sizeof(sanitized_cmd));
code = rrd_functions_find_by_name(host, result_wb, sanitized_cmd, sanitized_cmd_length, &host_function_acquired);
if(code != HTTP_RESP_OK) {
rrd_call_function_error(result_wb, "not found", code);
if(result_cb)
result_cb(result_wb, code, result_cb_data);
return code;
}
struct rrd_host_function *rdcf = dictionary_acquired_item_value(host_function_acquired);
if(!web_client_has_enough_access_level(access, rdcf->access)) {
if(!aclk_connected)
rrd_call_function_error(result_wb, "This Netdata must be connected to Netdata Cloud to access this function.", HTTP_RESP_PRECOND_FAIL);
else if(access >= HTTP_ACCESS_ANY)
rrd_call_function_error(result_wb, "You need to login to the Netdata Cloud space this agent is claimed to, to access this function.", HTTP_RESP_PRECOND_FAIL);
else /* if(access < HTTP_ACCESS_ANY && rdcf->access < access) */
rrd_call_function_error(result_wb, "To access this function you need to be an admin in this Netdata Cloud space.", HTTP_RESP_PRECOND_FAIL);
dictionary_acquired_item_release(host->functions, host_function_acquired);
if(result_cb)
result_cb(result_wb, HTTP_RESP_PRECOND_FAIL, result_cb_data);
return HTTP_RESP_PRECOND_FAIL;
}
if(timeout_s <= 0)
timeout_s = rdcf->timeout;
// ------------------------------------------------------------------------
// validate and parse the transaction, or generate a new transaction id
char uuid_str[UUID_COMPACT_STR_LEN];
uuid_t uuid;
if(!transaction || !*transaction || uuid_parse_flexi(transaction, uuid) != 0)
uuid_generate_random(uuid);
uuid_unparse_lower_compact(uuid, uuid_str);
transaction = uuid_str;
// ------------------------------------------------------------------------
// the function can only be executed in async mode
// put the function into the inflight requests
struct rrd_function_inflight t = {
.used = false,
.host = host,
.cmd = strdupz(cmd),
.sanitized_cmd = strdupz(sanitized_cmd),
.sanitized_cmd_length = sanitized_cmd_length,
.transaction = strdupz(transaction),
.source = strdupz(sanitized_source),
.payload = buffer_dup(payload),
.timeout = timeout_s,
.cancelled = false,
.stop_monotonic_ut = now_monotonic_usec() + timeout_s * USEC_PER_SEC,
.host_function_acquired = host_function_acquired,
.rdcf = rdcf,
.result = {
.wb = result_wb,
.cb = result_cb,
.data = result_cb_data,
},
.is_cancelled = {
.cb = is_cancelled_cb,
.data = is_cancelled_cb_data,
},
.progress = {
.cb = progress_cb,
.data = progress_cb_data,
},
};
uuid_copy(t.transaction_uuid, uuid);
struct rrd_function_inflight *r = dictionary_set(rrd_functions_inflight_requests, transaction, &t, sizeof(t));
if(r->used) {
nd_log(NDLS_DAEMON, NDLP_NOTICE,
"FUNCTIONS: duplicate transaction '%s', function: '%s'",
t.transaction, t.cmd);
code = rrd_call_function_error(result_wb, "duplicate transaction", HTTP_RESP_BAD_REQUEST);
rrd_functions_inflight_cleanup(&t);
dictionary_acquired_item_release(r->host->functions, t.host_function_acquired);
if(result_cb)
result_cb(result_wb, code, result_cb_data);
return code;
}
r->used = true;
// internal_error(true, "FUNCTIONS: transaction '%s' started", r->transaction);
if(r->rdcf->sync) {
// the caller has to wait
struct rrd_function_execute rfe = {
.transaction = &r->transaction_uuid,
.function = r->sanitized_cmd,
.payload = r->payload,
.source = r->source,
.stop_monotonic_ut = &r->stop_monotonic_ut,
.result = {
.wb = r->result.wb,
// we overwrite the result callbacks,
// so that we can clean up the allocations made
.cb = r->result.cb,
.data = r->result.data,
},
.progress = {
.cb = r->progress.cb,
.data = r->progress.data,
},
.is_cancelled = {
.cb = r->is_cancelled.cb,
.data = r->is_cancelled.data,
},
.register_canceller = {
.cb = NULL,
.data = NULL,
},
.register_progresser = {
.cb = NULL,
.data = NULL,
},
};
code = r->rdcf->execute_cb(&rfe, r->rdcf->execute_cb_data);
rrd_inflight_function_cleanup(host, r->transaction);
return code;
}
return rrd_call_function_async(r, wait);
}
bool rrd_function_has_this_original_result_callback(uuid_t *transaction, rrd_function_result_callback_t cb) {
bool ret = false;
char str[UUID_COMPACT_STR_LEN];
uuid_unparse_lower_compact(*transaction, str);
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(rrd_functions_inflight_requests, str);
if(item) {
struct rrd_function_inflight *r = dictionary_acquired_item_value(item);
if(r->result.cb == cb)
ret = true;
dictionary_acquired_item_release(rrd_functions_inflight_requests, item);
}
return ret;
}
static void rrd_function_cancel_inflight(struct rrd_function_inflight *r) {
if(!r)
return;
bool cancelled = __atomic_load_n(&r->cancelled, __ATOMIC_RELAXED);
if(cancelled) {
nd_log(NDLS_DAEMON, NDLP_DEBUG,
"FUNCTIONS: received a CANCEL request for transaction '%s', but it is already cancelled.",
r->transaction);
return;
}
__atomic_store_n(&r->cancelled, true, __ATOMIC_RELAXED);
if(!rrd_collector_dispatcher_acquire(r->rdcf->collector)) {
nd_log(NDLS_DAEMON, NDLP_DEBUG,
"FUNCTIONS: received a CANCEL request for transaction '%s', but the collector is not running.",
r->transaction);
return;
}
if(r->canceller.cb)
r->canceller.cb(r->canceller.data);
rrd_collector_dispatcher_release(r->rdcf->collector);
}
void rrd_function_cancel(const char *transaction) {
// internal_error(true, "FUNCTIONS: request to cancel transaction '%s'", transaction);
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(rrd_functions_inflight_requests, transaction);
if(!item) {
nd_log(NDLS_DAEMON, NDLP_DEBUG,
"FUNCTIONS: received a CANCEL request for transaction '%s', but the transaction is not running.",
transaction);
return;
}
struct rrd_function_inflight *r = dictionary_acquired_item_value(item);
rrd_function_cancel_inflight(r);
dictionary_acquired_item_release(rrd_functions_inflight_requests, item);
}
void rrd_function_progress(const char *transaction) {
const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(rrd_functions_inflight_requests, transaction);
if(!item) {
nd_log(NDLS_DAEMON, NDLP_DEBUG,
"FUNCTIONS: received a PROGRESS request for transaction '%s', but the transaction is not running.",
transaction);
return;
}
struct rrd_function_inflight *r = dictionary_acquired_item_value(item);
if(!rrd_collector_dispatcher_acquire(r->rdcf->collector)) {
nd_log(NDLS_DAEMON, NDLP_DEBUG,
"FUNCTIONS: received a PROGRESS request for transaction '%s', but the collector is not running.",
transaction);
goto cleanup;
}
functions_stop_monotonic_update_on_progress(&r->stop_monotonic_ut);
if(r->progresser.cb)
r->progresser.cb(r->progresser.data);
rrd_collector_dispatcher_release(r->rdcf->collector);
cleanup:
dictionary_acquired_item_release(rrd_functions_inflight_requests, item);
}
void rrd_function_call_progresser(uuid_t *transaction) {
char str[UUID_COMPACT_STR_LEN];
uuid_unparse_lower_compact(*transaction, str);
rrd_function_progress(str);
}