0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-30 07:30:04 +00:00
netdata_netdata/web/api/formatters/rrd2json.c
Costa Tsaousis 368a26cfee
DBENGINE v2 ()
* count open cache pages refering to datafile

* eliminate waste flush attempts

* remove eliminated variable

* journal v2 scanning split functions

* avoid locking open cache for a long time while migrating to journal v2

* dont acquire datafile for the loop; disable thread cancelability while a query is running

* work on datafile acquiring

* work on datafile deletion

* work on datafile deletion again

* logs of dbengine should start with DBENGINE

* thread specific key for queries to check if a query finishes without a finalize

* page_uuid is not used anymore

* Cleanup judy traversal when building new v2
Remove not needed calls to metric registry

* metric is 8 bytes smaller; timestamps are protected with a spinlock; timestamps in metric are now always coherent

* disable checks for invalid time-ranges

* Remove type from page details

* report scanning time

* remove infinite loop from datafile acquire for deletion

* remove infinite loop from datafile acquire for deletion again

* trace query handles

* properly allocate array of dimensions in replication

* metrics cleanup

* metrics registry uses arrayalloc

* arrayalloc free should be protected by lock

* use array alloc in page cache

* journal v2 scanning fix

* datafile reference leaking hunding

* do not load metrics of future timestamps

* initialize reasons

* fix datafile reference leak

* do not load pages that are entirely overlapped by others

* expand metric retention atomically

* split replication logic in initialization and execution

* replication prepare ahead queries

* replication prepare ahead queries fixed

* fix replication workers accounting

* add router active queries chart

* restore accounting of pages metadata sources; cleanup replication

* dont count skipped pages as unroutable

* notes on services shutdown

* do not migrate to journal v2 too early, while it has pending dirty pages in the main cache for the specific journal file

* do not add pages we dont need to pdc

* time in range re-work to provide info about past and future matches

* finner control on the pages selected for processing; accounting of page related issues

* fix invalid reference to handle->page

* eliminate data collection handle of pg_lookup_next

* accounting for queries with gaps

* query preprocessing the same way the processing is done; cache now supports all operations on Judy

* dynamic libuv workers based on number of processors; minimum libuv workers 8; replication query init ahead uses libuv workers - reserved ones (3)

* get into pdc all matching pages from main cache and open cache; do not do v2 scan if main cache and open cache can satisfy the query

* finner gaps calculation; accounting of overlapping pages in queries

* fix gaps accounting

* move datafile deletion to worker thread

* tune libuv workers and thread stack size

* stop netdata threads gradually

* run indexing together with cache flush/evict

* more work on clean shutdown

* limit the number of pages to evict per run

* do not lock the clean queue for accesses if it is not possible at that time - the page will be moved to the back of the list during eviction

* economies on flags for smaller page footprint; cleanup and renames

* eviction moves referenced pages to the end of the queue

* use murmur hash for indexing partition

* murmur should be static

* use more indexing partitions

* revert number of partitions to number of cpus

* cancel threads first, then stop services

* revert default thread stack size

* dont execute replication requests of disconnected senders

* wait more time for services that are exiting gradually

* fixed last commit

* finer control on page selection algorithm

* default stacksize of 1MB

* fix formatting

* fix worker utilization going crazy when the number is rotating

* avoid buffer full due to replication preprocessing of requests

* support query priorities

* add count of spins in spinlock when compiled with netdata internal checks

* remove prioritization from dbengine queries; cache now uses mutexes for the queues

* hot pages are now in sections judy arrays, like dirty

* align replication queries to optimal page size

* during flushing add to clean and evict in batches

* Revert "during flushing add to clean and evict in batches"

This reverts commit 8fb2b69d06.

* dont lock clean while evicting pages during flushing

* Revert "dont lock clean while evicting pages during flushing"

This reverts commit d6c82b5f40.

* Revert "Revert "during flushing add to clean and evict in batches""

This reverts commit ca7a187537.

* dont cross locks during flushing, for the fastest flushes possible

* low-priority queries load pages synchronously

* Revert "low-priority queries load pages synchronously"

This reverts commit 1ef2662ddc.

* cache uses spinlock again

* during flushing, dont lock the clean queue at all; each item is added atomically

* do smaller eviction runs

* evict one page at a time to minimize lock contention on the clean queue

* fix eviction statistics

* fix last commit

* plain should be main cache

* event loop cleanup; evictions and flushes can now happen concurrently

* run flush and evictions from tier0 only

* remove not needed variables

* flushing open cache is not needed; flushing protection is irrelevant since flushing is global for all tiers; added protection to datafiles so that only one flusher can run per datafile at any given time

* added worker jobs in timer to find the slow part of it

* support fast eviction of pages when all_of_them is set

* revert default thread stack size

* bypass event loop for dispatching read extent commands to workers - send them directly

* Revert "bypass event loop for dispatching read extent commands to workers - send them directly"

This reverts commit 2c08bc5bab.

* cache work requests

* minimize memory operations during flushing; caching of extent_io_descriptors and page_descriptors

* publish flushed pages to open cache in the thread pool

* prevent eventloop requests from getting stacked in the event loop

* single threaded dbengine controller; support priorities for all queries; major cleanup and restructuring of rrdengine.c

* more rrdengine.c cleanup

* enable db rotation

* do not log when there is a filter

* do not run multiple migration to journal v2

* load all extents async

* fix wrong paste

* report opcodes waiting, works dispatched, works executing

* cleanup event loop memory every 10 minutes

* dont dispatch more work requests than the number of threads available

* use the dispatched counter instead of the executing counter to check if the worker thread pool is full

* remove UV_RUN_NOWAIT

* replication to fill the queues

* caching of extent buffers; code cleanup

* caching of pdc and pd; rework on journal v2 indexing, datafile creation, database rotation

* single transaction wal

* synchronous flushing

* first cancel the threads, then signal them to exit

* caching of rrdeng query handles; added priority to query target; health is now low prio

* add priority to the missing points; do not allow critical priority in queries

* offload query preparation and routing to libuv thread pool

* updated timing charts for the offloaded query preparation

* caching of WALs

* accounting for struct caches (buffers); do not load extents with invalid sizes

* protection against memory booming during replication due to the optimal alignment of pages; sender thread buffer is now also reset when the circular buffer is reset

* also check if the expanded before is not the chart later updated time

* also check if the expanded before is not after the wall clock time of when the query started

* Remove unused variable

* replication to queue less queries; cleanup of internal fatals

* Mark dimension to be updated async

* caching of extent_page_details_list (epdl) and datafile_extent_offset_list (deol)

* disable pgc stress test, under an ifdef

* disable mrg stress test under an ifdef

* Mark chart and host labels, host info for async check and store in the database

* dictionary items use arrayalloc

* cache section pages structure is allocated with arrayalloc

* Add function to wakeup the aclk query threads and check for exit
Register function to be called during shutdown after signaling the service to exit

* parallel preparation of all dimensions of queries

* be more sensitive to enable streaming after replication

* atomically finish chart replication

* fix last commit

* fix last commit again

* fix last commit again again

* fix last commit again again again

* unify the normalization of retention calculation for collected charts; do not enable streaming if more than 60 points are to be transferred; eliminate an allocation during replication

* do not cancel start streaming; use high priority queries when we have locked chart data collection

* prevent starvation on opcodes execution, by allowing 2% of the requests to be re-ordered

* opcode now uses 2 spinlocks one for the caching of allocations and one for the waiting queue

* Remove check locks and NETDATA_VERIFY_LOCKS as it is not needed anymore

* Fix bad memory allocation / cleanup

* Cleanup ACLK sync initialization (part 1)

* Don't update metric registry during shutdown (part 1)

* Prevent crash when dashboard is refreshed and host goes away

* Mark ctx that is shutting down.
Test not adding flushed pages to open cache as hot if we are shutting down

* make ML work

* Fix compile without NETDATA_INTERNAL_CHECKS

* shutdown each ctx independently

* fix completion of quiesce

* do not update shared ML charts

* Create ML charts on child hosts.

When a parent runs a ML for a child, the relevant-ML charts
should be created on the child host. These charts should use
the parent's hostname to differentiate multiple parents that might
run ML for a child.

The only exception to this rule is the training/prediction resource
usage charts. These are created on the localhost of the parent host,
because they provide information specific to said host.

* check new ml code

* first save the database, then free all memory

* dbengine prep exit before freeing all memory; fixed deadlock in cache hot to dirty; added missing check to query engine about metrics without any data in the db

* Cleanup metadata thread (part 2)

* increase refcount before dispatching prep command

* Do not try to stop anomaly detection threads twice.

A separate function call has been added to stop anomaly detection threads.
This commit removes the left over function calls that were made
internally when a host was being created/destroyed.

* Remove allocations when smoothing samples buffer

The number of dims per sample is always 1, ie. we are training and
predicting only individual dimensions.

* set the orphan flag when loading archived hosts

* track worker dispatch callbacks and threadpool worker init

* make ML threads joinable; mark ctx having flushing in progress as early as possible

* fix allocation counter

* Cleanup metadata thread (part 3)

* Cleanup metadata thread (part 4)

* Skip metadata host scan when running unittest

* unittest support during init

* dont use all the libuv threads for queries

* break an infinite loop when sleep_usec() is interrupted

* ml prediction is a collector for several charts

* sleep_usec() now makes sure it will never loop if it passes the time expected; sleep_usec() now uses nanosleep() because clock_nanosleep() misses signals on netdata exit

* worker_unregister() in netdata threads cleanup

* moved pdc/epdl/deol/extent_buffer related code to pdc.c and pdc.h

* fixed ML issues

* removed engine2 directory

* added dbengine2 files in CMakeLists.txt

* move query plan data to query target, so that they can be exposed by in jsonwrap

* uniform definition of query plan according to the other query target members

* event_loop should be in daemon, not libnetdata

* metric_retention_by_uuid() is now part of the storage engine abstraction

* unify time_t variables to have the suffix _s (meaning: seconds)

* old dbengine statistics become "dbengine io"

* do not enable ML resource usage charts by default

* unify ml chart families, plugins and modules

* cleanup query plans from query target

* cleanup all extent buffers

* added debug info for rrddim slot to time

* rrddim now does proper gap management

* full rewrite of the mem modes

* use library functions for madvise

* use CHECKSUM_SZ for the checksum size

* fix coverity warning about the impossible case of returning a page that is entirely in the past of the query

* fix dbengine shutdown

* keep the old datafile lock until a new datafile has been created, to avoid creating multiple datafiles concurrently

* fine tune cache evictions

* dont initialize health if the health service is not running - prevent crash on shutdown while children get connected

* rename AS threads to ACLK[hostname]

* prevent re-use of uninitialized memory in queries

* use JulyL instead of JudyL for PDC operations - to test it first

* add also JulyL files

* fix July memory accounting

* disable July for PDC (use Judy)

* use the function to remove datafiles from linked list

* fix july and event_loop

* add july to libnetdata subdirs

* rename time_t variables that end in _t to end in _s

* replicate when there is a gap at the beginning of the replication period

* reset postponing of sender connections when a receiver is connected

* Adjust update every properly

* fix replication infinite loop due to last change

* packed enums in rrd.h and cleanup of obsolete rrd structure members

* prevent deadlock in replication: replication_recalculate_buffer_used_ratio_unsafe() deadlocking with replication_sender_delete_pending_requests()

* void unused variable

* void unused variables

* fix indentation

* entries_by_time calculation in VD was wrong; restored internal checks for checking future timestamps

* macros to caclulate page entries by time and size

* prevent statsd cleanup crash on exit

* cleanup health thread related variables

Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Co-authored-by: vkalintiris <vasilis@netdata.cloud>
2023-01-10 19:59:21 +02:00

341 lines
11 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "web/api/web_api_v1.h"
#include "database/storage_engine.h"
void rrd_stats_api_v1_chart(RRDSET *st, BUFFER *wb) {
rrdset2json(st, wb, NULL, NULL, 0);
}
void rrdr_buffer_print_format(BUFFER *wb, uint32_t format) {
switch(format) {
case DATASOURCE_JSON:
buffer_strcat(wb, DATASOURCE_FORMAT_JSON);
break;
case DATASOURCE_DATATABLE_JSON:
buffer_strcat(wb, DATASOURCE_FORMAT_DATATABLE_JSON);
break;
case DATASOURCE_DATATABLE_JSONP:
buffer_strcat(wb, DATASOURCE_FORMAT_DATATABLE_JSONP);
break;
case DATASOURCE_JSONP:
buffer_strcat(wb, DATASOURCE_FORMAT_JSONP);
break;
case DATASOURCE_SSV:
buffer_strcat(wb, DATASOURCE_FORMAT_SSV);
break;
case DATASOURCE_CSV:
buffer_strcat(wb, DATASOURCE_FORMAT_CSV);
break;
case DATASOURCE_TSV:
buffer_strcat(wb, DATASOURCE_FORMAT_TSV);
break;
case DATASOURCE_HTML:
buffer_strcat(wb, DATASOURCE_FORMAT_HTML);
break;
case DATASOURCE_JS_ARRAY:
buffer_strcat(wb, DATASOURCE_FORMAT_JS_ARRAY);
break;
case DATASOURCE_SSV_COMMA:
buffer_strcat(wb, DATASOURCE_FORMAT_SSV_COMMA);
break;
default:
buffer_strcat(wb, "unknown");
break;
}
}
int rrdset2value_api_v1(
RRDSET *st
, BUFFER *wb
, NETDATA_DOUBLE *n
, const char *dimensions
, size_t points
, time_t after
, time_t before
, RRDR_GROUPING group_method
, const char *group_options
, time_t resampling_time
, uint32_t options
, time_t *db_after
, time_t *db_before
, size_t *db_points_read
, size_t *db_points_per_tier
, size_t *result_points_generated
, int *value_is_null
, NETDATA_DOUBLE *anomaly_rate
, time_t timeout
, size_t tier
, QUERY_SOURCE query_source
, STORAGE_PRIORITY priority
) {
int ret = HTTP_RESP_INTERNAL_SERVER_ERROR;
ONEWAYALLOC *owa = onewayalloc_create(0);
RRDR *r = rrd2rrdr_legacy(
owa,
st,
points,
after,
before,
group_method,
resampling_time,
options,
dimensions,
group_options,
timeout,
tier,
query_source,
priority);
if(!r) {
if(value_is_null) *value_is_null = 1;
ret = HTTP_RESP_INTERNAL_SERVER_ERROR;
goto cleanup;
}
if(db_points_read)
*db_points_read += r->internal.db_points_read;
if(db_points_per_tier) {
for(size_t t = 0; t < storage_tiers ;t++)
db_points_per_tier[t] += r->internal.tier_points_read[t];
}
if(result_points_generated)
*result_points_generated += r->internal.result_points_generated;
if(rrdr_rows(r) == 0) {
if(db_after) *db_after = 0;
if(db_before) *db_before = 0;
if(value_is_null) *value_is_null = 1;
ret = HTTP_RESP_BAD_REQUEST;
goto cleanup;
}
if(wb) {
if (r->result_options & RRDR_RESULT_OPTION_RELATIVE)
buffer_no_cacheable(wb);
else if (r->result_options & RRDR_RESULT_OPTION_ABSOLUTE)
buffer_cacheable(wb);
}
if(db_after) *db_after = r->after;
if(db_before) *db_before = r->before;
long i = (!(options & RRDR_OPTION_REVERSED))?(long)rrdr_rows(r) - 1:0;
*n = rrdr2value(r, i, options, value_is_null, anomaly_rate);
ret = HTTP_RESP_OK;
cleanup:
rrdr_free(owa, r);
onewayalloc_destroy(owa);
return ret;
}
int data_query_execute(ONEWAYALLOC *owa, BUFFER *wb, QUERY_TARGET *qt, time_t *latest_timestamp) {
RRDR *r = rrd2rrdr(owa, qt);
if(!r) {
buffer_strcat(wb, "Cannot generate output with these parameters on this chart.");
return HTTP_RESP_INTERNAL_SERVER_ERROR;
}
if (r->result_options & RRDR_RESULT_OPTION_CANCEL) {
rrdr_free(owa, r);
return HTTP_RESP_BACKEND_FETCH_FAILED;
}
if(r->result_options & RRDR_RESULT_OPTION_RELATIVE)
buffer_no_cacheable(wb);
else if(r->result_options & RRDR_RESULT_OPTION_ABSOLUTE)
buffer_cacheable(wb);
if(latest_timestamp && rrdr_rows(r) > 0)
*latest_timestamp = r->before;
DATASOURCE_FORMAT format = qt->request.format;
RRDR_OPTIONS options = qt->request.options;
RRDR_GROUPING group_method = qt->request.group_method;
switch(format) {
case DATASOURCE_SSV:
if(options & RRDR_OPTION_JSON_WRAP) {
wb->contenttype = CT_APPLICATION_JSON;
rrdr_json_wrapper_begin(r, wb, format, options, 1, group_method);
rrdr2ssv(r, wb, options, "", " ", "");
rrdr_json_wrapper_end(r, wb, format, options, 1);
}
else {
wb->contenttype = CT_TEXT_PLAIN;
rrdr2ssv(r, wb, options, "", " ", "");
}
break;
case DATASOURCE_SSV_COMMA:
if(options & RRDR_OPTION_JSON_WRAP) {
wb->contenttype = CT_APPLICATION_JSON;
rrdr_json_wrapper_begin(r, wb, format, options, 1, group_method);
rrdr2ssv(r, wb, options, "", ",", "");
rrdr_json_wrapper_end(r, wb, format, options, 1);
}
else {
wb->contenttype = CT_TEXT_PLAIN;
rrdr2ssv(r, wb, options, "", ",", "");
}
break;
case DATASOURCE_JS_ARRAY:
if(options & RRDR_OPTION_JSON_WRAP) {
wb->contenttype = CT_APPLICATION_JSON;
rrdr_json_wrapper_begin(r, wb, format, options, 0, group_method);
rrdr2ssv(r, wb, options, "[", ",", "]");
rrdr_json_wrapper_end(r, wb, format, options, 0);
}
else {
wb->contenttype = CT_APPLICATION_JSON;
rrdr2ssv(r, wb, options, "[", ",", "]");
}
break;
case DATASOURCE_CSV:
if(options & RRDR_OPTION_JSON_WRAP) {
wb->contenttype = CT_APPLICATION_JSON;
rrdr_json_wrapper_begin(r, wb, format, options, 1, group_method);
rrdr2csv(r, wb, format, options, "", ",", "\\n", "");
rrdr_json_wrapper_end(r, wb, format, options, 1);
}
else {
wb->contenttype = CT_TEXT_PLAIN;
rrdr2csv(r, wb, format, options, "", ",", "\r\n", "");
}
break;
case DATASOURCE_CSV_MARKDOWN:
if(options & RRDR_OPTION_JSON_WRAP) {
wb->contenttype = CT_APPLICATION_JSON;
rrdr_json_wrapper_begin(r, wb, format, options, 1, group_method);
rrdr2csv(r, wb, format, options, "", "|", "\\n", "");
rrdr_json_wrapper_end(r, wb, format, options, 1);
}
else {
wb->contenttype = CT_TEXT_PLAIN;
rrdr2csv(r, wb, format, options, "", "|", "\r\n", "");
}
break;
case DATASOURCE_CSV_JSON_ARRAY:
wb->contenttype = CT_APPLICATION_JSON;
if(options & RRDR_OPTION_JSON_WRAP) {
rrdr_json_wrapper_begin(r, wb, format, options, 0, group_method);
buffer_strcat(wb, "[\n");
rrdr2csv(r, wb, format, options + RRDR_OPTION_LABEL_QUOTES, "[", ",", "]", ",\n");
buffer_strcat(wb, "\n]");
rrdr_json_wrapper_end(r, wb, format, options, 0);
}
else {
wb->contenttype = CT_APPLICATION_JSON;
buffer_strcat(wb, "[\n");
rrdr2csv(r, wb, format, options + RRDR_OPTION_LABEL_QUOTES, "[", ",", "]", ",\n");
buffer_strcat(wb, "\n]");
}
break;
case DATASOURCE_TSV:
if(options & RRDR_OPTION_JSON_WRAP) {
wb->contenttype = CT_APPLICATION_JSON;
rrdr_json_wrapper_begin(r, wb, format, options, 1, group_method);
rrdr2csv(r, wb, format, options, "", "\t", "\\n", "");
rrdr_json_wrapper_end(r, wb, format, options, 1);
}
else {
wb->contenttype = CT_TEXT_PLAIN;
rrdr2csv(r, wb, format, options, "", "\t", "\r\n", "");
}
break;
case DATASOURCE_HTML:
if(options & RRDR_OPTION_JSON_WRAP) {
wb->contenttype = CT_APPLICATION_JSON;
rrdr_json_wrapper_begin(r, wb, format, options, 1, group_method);
buffer_strcat(wb, "<html>\\n<center>\\n<table border=\\\"0\\\" cellpadding=\\\"5\\\" cellspacing=\\\"5\\\">\\n");
rrdr2csv(r, wb, format, options, "<tr><td>", "</td><td>", "</td></tr>\\n", "");
buffer_strcat(wb, "</table>\\n</center>\\n</html>\\n");
rrdr_json_wrapper_end(r, wb, format, options, 1);
}
else {
wb->contenttype = CT_TEXT_HTML;
buffer_strcat(wb, "<html>\n<center>\n<table border=\"0\" cellpadding=\"5\" cellspacing=\"5\">\n");
rrdr2csv(r, wb, format, options, "<tr><td>", "</td><td>", "</td></tr>\n", "");
buffer_strcat(wb, "</table>\n</center>\n</html>\n");
}
break;
case DATASOURCE_DATATABLE_JSONP:
wb->contenttype = CT_APPLICATION_X_JAVASCRIPT;
if(options & RRDR_OPTION_JSON_WRAP)
rrdr_json_wrapper_begin(r, wb, format, options, 0, group_method);
rrdr2json(r, wb, options, 1);
if(options & RRDR_OPTION_JSON_WRAP)
rrdr_json_wrapper_end(r, wb, format, options, 0);
break;
case DATASOURCE_DATATABLE_JSON:
wb->contenttype = CT_APPLICATION_JSON;
if(options & RRDR_OPTION_JSON_WRAP)
rrdr_json_wrapper_begin(r, wb, format, options, 0, group_method);
rrdr2json(r, wb, options, 1);
if(options & RRDR_OPTION_JSON_WRAP)
rrdr_json_wrapper_end(r, wb, format, options, 0);
break;
case DATASOURCE_JSONP:
wb->contenttype = CT_APPLICATION_X_JAVASCRIPT;
if(options & RRDR_OPTION_JSON_WRAP)
rrdr_json_wrapper_begin(r, wb, format, options, 0, group_method);
rrdr2json(r, wb, options, 0);
if(options & RRDR_OPTION_JSON_WRAP)
rrdr_json_wrapper_end(r, wb, format, options, 0);
break;
case DATASOURCE_JSON:
default:
wb->contenttype = CT_APPLICATION_JSON;
if(options & RRDR_OPTION_JSON_WRAP)
rrdr_json_wrapper_begin(r, wb, format, options, 0, group_method);
rrdr2json(r, wb, options, 0);
if(options & RRDR_OPTION_JSON_WRAP) {
if(options & RRDR_OPTION_RETURN_JWAR) {
rrdr_json_wrapper_anomaly_rates(r, wb, format, options, 0);
rrdr2json(r, wb, options | RRDR_OPTION_INTERNAL_AR, 0);
}
rrdr_json_wrapper_end(r, wb, format, options, 0);
}
break;
}
rrdr_free(owa, r);
return HTTP_RESP_OK;
}