0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-25 13:33:48 +00:00
netdata_netdata/database/engine/rrdengine.c

860 lines
No EOL
31 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#define NETDATA_RRD_INTERNALS
#include "rrdengine.h"
rrdeng_stats_t global_io_errors = 0;
rrdeng_stats_t global_fs_errors = 0;
rrdeng_stats_t rrdeng_reserved_file_descriptors = 0;
void sanity_check(void)
{
/* Magic numbers must fit in the super-blocks */
BUILD_BUG_ON(strlen(RRDENG_DF_MAGIC) > RRDENG_MAGIC_SZ);
BUILD_BUG_ON(strlen(RRDENG_JF_MAGIC) > RRDENG_MAGIC_SZ);
/* Version strings must fit in the super-blocks */
BUILD_BUG_ON(strlen(RRDENG_DF_VER) > RRDENG_VER_SZ);
BUILD_BUG_ON(strlen(RRDENG_JF_VER) > RRDENG_VER_SZ);
/* Data file super-block cannot be larger than RRDENG_BLOCK_SIZE */
BUILD_BUG_ON(RRDENG_DF_SB_PADDING_SZ < 0);
BUILD_BUG_ON(sizeof(uuid_t) != UUID_SZ); /* check UUID size */
/* page count must fit in 8 bits */
BUILD_BUG_ON(MAX_PAGES_PER_EXTENT > 255);
}
void read_extent_cb(uv_fs_t* req)
{
struct rrdengine_worker_config* wc = req->loop->data;
struct rrdengine_instance *ctx = wc->ctx;
struct extent_io_descriptor *xt_io_descr;
struct rrdeng_page_descr *descr;
struct page_cache_descr *pg_cache_descr;
int ret;
unsigned i, j, count;
void *page, *uncompressed_buf = NULL;
uint32_t payload_length, payload_offset, page_offset, uncompressed_payload_length;
uint8_t have_read_error = 0;
/* persistent structures */
struct rrdeng_df_extent_header *header;
struct rrdeng_df_extent_trailer *trailer;
uLong crc;
xt_io_descr = req->data;
header = xt_io_descr->buf;
payload_length = header->payload_length;
count = header->number_of_pages;
payload_offset = sizeof(*header) + sizeof(header->descr[0]) * count;
trailer = xt_io_descr->buf + xt_io_descr->bytes - sizeof(*trailer);
if (req->result < 0) {
struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile;
++ctx->stats.io_errors;
rrd_stat_atomic_add(&global_io_errors, 1);
have_read_error = 1;
error("%s: uv_fs_read - %s - extent at offset %"PRIu64"(%u) in datafile %u-%u.", __func__,
uv_strerror((int)req->result), xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno);
goto after_crc_check;
}
crc = crc32(0L, Z_NULL, 0);
crc = crc32(crc, xt_io_descr->buf, xt_io_descr->bytes - sizeof(*trailer));
ret = crc32cmp(trailer->checksum, crc);
#ifdef NETDATA_INTERNAL_CHECKS
{
struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile;
debug(D_RRDENGINE, "%s: Extent at offset %"PRIu64"(%u) was read from datafile %u-%u. CRC32 check: %s", __func__,
xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno, ret ? "FAILED" : "SUCCEEDED");
}
#endif
if (unlikely(ret)) {
struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile;
++ctx->stats.io_errors;
rrd_stat_atomic_add(&global_io_errors, 1);
have_read_error = 1;
error("%s: Extent at offset %"PRIu64"(%u) was read from datafile %u-%u. CRC32 check: FAILED", __func__,
xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno);
}
after_crc_check:
if (!have_read_error && RRD_NO_COMPRESSION != header->compression_algorithm) {
uncompressed_payload_length = 0;
for (i = 0 ; i < count ; ++i) {
uncompressed_payload_length += header->descr[i].page_length;
}
uncompressed_buf = mallocz(uncompressed_payload_length);
ret = LZ4_decompress_safe(xt_io_descr->buf + payload_offset, uncompressed_buf,
payload_length, uncompressed_payload_length);
ctx->stats.before_decompress_bytes += payload_length;
ctx->stats.after_decompress_bytes += ret;
debug(D_RRDENGINE, "LZ4 decompressed %u bytes to %d bytes.", payload_length, ret);
/* care, we don't hold the descriptor mutex */
}
for (i = 0 ; i < xt_io_descr->descr_count; ++i) {
page = mallocz(RRDENG_BLOCK_SIZE);
descr = xt_io_descr->descr_array[i];
for (j = 0, page_offset = 0; j < count; ++j) {
/* care, we don't hold the descriptor mutex */
if (!uuid_compare(*(uuid_t *) header->descr[j].uuid, *descr->id) &&
header->descr[j].page_length == descr->page_length &&
header->descr[j].start_time == descr->start_time &&
header->descr[j].end_time == descr->end_time) {
break;
}
page_offset += header->descr[j].page_length;
}
/* care, we don't hold the descriptor mutex */
if (have_read_error) {
/* Applications should make sure NULL values match 0 as does SN_EMPTY_SLOT */
memset(page, 0, descr->page_length);
} else if (RRD_NO_COMPRESSION == header->compression_algorithm) {
(void) memcpy(page, xt_io_descr->buf + payload_offset + page_offset, descr->page_length);
} else {
(void) memcpy(page, uncompressed_buf + page_offset, descr->page_length);
}
pg_cache_replaceQ_insert(ctx, descr);
rrdeng_page_descr_mutex_lock(ctx, descr);
pg_cache_descr = descr->pg_cache_descr;
pg_cache_descr->page = page;
pg_cache_descr->flags |= RRD_PAGE_POPULATED;
pg_cache_descr->flags &= ~RRD_PAGE_READ_PENDING;
debug(D_RRDENGINE, "%s: Waking up waiters.", __func__);
if (xt_io_descr->release_descr) {
pg_cache_put_unsafe(descr);
} else {
pg_cache_wake_up_waiters_unsafe(descr);
}
rrdeng_page_descr_mutex_unlock(ctx, descr);
}
if (!have_read_error && RRD_NO_COMPRESSION != header->compression_algorithm) {
freez(uncompressed_buf);
}
if (xt_io_descr->completion)
complete(xt_io_descr->completion);
uv_fs_req_cleanup(req);
free(xt_io_descr->buf);
freez(xt_io_descr);
}
static void do_read_extent(struct rrdengine_worker_config* wc,
struct rrdeng_page_descr **descr,
unsigned count,
uint8_t release_descr)
{
struct rrdengine_instance *ctx = wc->ctx;
struct page_cache_descr *pg_cache_descr;
int ret;
unsigned i, size_bytes, pos, real_io_size;
// uint32_t payload_length;
struct extent_io_descriptor *xt_io_descr;
struct rrdengine_datafile *datafile;
datafile = descr[0]->extent->datafile;
pos = descr[0]->extent->offset;
size_bytes = descr[0]->extent->size;
xt_io_descr = mallocz(sizeof(*xt_io_descr));
ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes));
if (unlikely(ret)) {
fatal("posix_memalign:%s", strerror(ret));
/* freez(xt_io_descr);
return;*/
}
for (i = 0 ; i < count; ++i) {
rrdeng_page_descr_mutex_lock(ctx, descr[i]);
pg_cache_descr = descr[i]->pg_cache_descr;
pg_cache_descr->flags |= RRD_PAGE_READ_PENDING;
// payload_length = descr[i]->page_length;
rrdeng_page_descr_mutex_unlock(ctx, descr[i]);
xt_io_descr->descr_array[i] = descr[i];
}
xt_io_descr->descr_count = count;
xt_io_descr->bytes = size_bytes;
xt_io_descr->pos = pos;
xt_io_descr->req.data = xt_io_descr;
xt_io_descr->completion = NULL;
/* xt_io_descr->descr_commit_idx_array[0] */
xt_io_descr->release_descr = release_descr;
real_io_size = ALIGN_BYTES_CEILING(size_bytes);
xt_io_descr->iov = uv_buf_init((void *)xt_io_descr->buf, real_io_size);
ret = uv_fs_read(wc->loop, &xt_io_descr->req, datafile->file, &xt_io_descr->iov, 1, pos, read_extent_cb);
assert (-1 != ret);
ctx->stats.io_read_bytes += real_io_size;
++ctx->stats.io_read_requests;
ctx->stats.io_read_extent_bytes += real_io_size;
++ctx->stats.io_read_extents;
ctx->stats.pg_cache_backfills += count;
}
static void commit_data_extent(struct rrdengine_worker_config* wc, struct extent_io_descriptor *xt_io_descr)
{
struct rrdengine_instance *ctx = wc->ctx;
unsigned count, payload_length, descr_size, size_bytes;
void *buf;
/* persistent structures */
struct rrdeng_df_extent_header *df_header;
struct rrdeng_jf_transaction_header *jf_header;
struct rrdeng_jf_store_data *jf_metric_data;
struct rrdeng_jf_transaction_trailer *jf_trailer;
uLong crc;
df_header = xt_io_descr->buf;
count = df_header->number_of_pages;
descr_size = sizeof(*jf_metric_data->descr) * count;
payload_length = sizeof(*jf_metric_data) + descr_size;
size_bytes = sizeof(*jf_header) + payload_length + sizeof(*jf_trailer);
buf = wal_get_transaction_buffer(wc, size_bytes);
jf_header = buf;
jf_header->type = STORE_DATA;
jf_header->reserved = 0;
jf_header->id = ctx->commit_log.transaction_id++;
jf_header->payload_length = payload_length;
jf_metric_data = buf + sizeof(*jf_header);
jf_metric_data->extent_offset = xt_io_descr->pos;
jf_metric_data->extent_size = xt_io_descr->bytes;
jf_metric_data->number_of_pages = count;
memcpy(jf_metric_data->descr, df_header->descr, descr_size);
jf_trailer = buf + sizeof(*jf_header) + payload_length;
crc = crc32(0L, Z_NULL, 0);
crc = crc32(crc, buf, sizeof(*jf_header) + payload_length);
crc32set(jf_trailer->checksum, crc);
}
static void do_commit_transaction(struct rrdengine_worker_config* wc, uint8_t type, void *data)
{
switch (type) {
case STORE_DATA:
commit_data_extent(wc, (struct extent_io_descriptor *)data);
break;
default:
assert(type == STORE_DATA);
break;
}
}
void flush_pages_cb(uv_fs_t* req)
{
struct rrdengine_worker_config* wc = req->loop->data;
struct rrdengine_instance *ctx = wc->ctx;
struct page_cache *pg_cache = &ctx->pg_cache;
struct extent_io_descriptor *xt_io_descr;
struct rrdeng_page_descr *descr;
struct page_cache_descr *pg_cache_descr;
int ret;
unsigned i, count;
Word_t commit_id;
xt_io_descr = req->data;
if (req->result < 0) {
++ctx->stats.io_errors;
rrd_stat_atomic_add(&global_io_errors, 1);
error("%s: uv_fs_write: %s", __func__, uv_strerror((int)req->result));
}
#ifdef NETDATA_INTERNAL_CHECKS
{
struct rrdengine_datafile *datafile = xt_io_descr->descr_array[0]->extent->datafile;
debug(D_RRDENGINE, "%s: Extent at offset %"PRIu64"(%u) was written to datafile %u-%u. Waking up waiters.",
__func__, xt_io_descr->pos, xt_io_descr->bytes, datafile->tier, datafile->fileno);
}
#endif
count = xt_io_descr->descr_count;
for (i = 0 ; i < count ; ++i) {
/* care, we don't hold the descriptor mutex */
descr = xt_io_descr->descr_array[i];
uv_rwlock_wrlock(&pg_cache->commited_page_index.lock);
commit_id = xt_io_descr->descr_commit_idx_array[i];
ret = JudyLDel(&pg_cache->commited_page_index.JudyL_array, commit_id, PJE0);
assert(1 == ret);
--pg_cache->commited_page_index.nr_commited_pages;
uv_rwlock_wrunlock(&pg_cache->commited_page_index.lock);
pg_cache_replaceQ_insert(ctx, descr);
rrdeng_page_descr_mutex_lock(ctx, descr);
pg_cache_descr = descr->pg_cache_descr;
pg_cache_descr->flags &= ~(RRD_PAGE_DIRTY | RRD_PAGE_WRITE_PENDING);
/* wake up waiters, care no reference being held */
pg_cache_wake_up_waiters_unsafe(descr);
rrdeng_page_descr_mutex_unlock(ctx, descr);
}
if (xt_io_descr->completion)
complete(xt_io_descr->completion);
uv_fs_req_cleanup(req);
free(xt_io_descr->buf);
freez(xt_io_descr);
}
/*
* completion must be NULL or valid.
* Returns 0 when no flushing can take place.
* Returns datafile bytes to be written on successful flushing initiation.
*/
static int do_flush_pages(struct rrdengine_worker_config* wc, int force, struct completion *completion)
{
struct rrdengine_instance *ctx = wc->ctx;
struct page_cache *pg_cache = &ctx->pg_cache;
int ret;
int compressed_size, max_compressed_size = 0;
unsigned i, count, size_bytes, pos, real_io_size;
uint32_t uncompressed_payload_length, payload_offset;
struct rrdeng_page_descr *descr, *eligible_pages[MAX_PAGES_PER_EXTENT];
struct page_cache_descr *pg_cache_descr;
struct extent_io_descriptor *xt_io_descr;
void *compressed_buf = NULL;
Word_t descr_commit_idx_array[MAX_PAGES_PER_EXTENT];
Pvoid_t *PValue;
Word_t Index;
uint8_t compression_algorithm = ctx->global_compress_alg;
struct extent_info *extent;
struct rrdengine_datafile *datafile;
/* persistent structures */
struct rrdeng_df_extent_header *header;
struct rrdeng_df_extent_trailer *trailer;
uLong crc;
if (force) {
debug(D_RRDENGINE, "Asynchronous flushing of extent has been forced by page pressure.");
}
uv_rwlock_rdlock(&pg_cache->commited_page_index.lock);
for (Index = 0, count = 0, uncompressed_payload_length = 0,
PValue = JudyLFirst(pg_cache->commited_page_index.JudyL_array, &Index, PJE0),
descr = unlikely(NULL == PValue) ? NULL : *PValue ;
descr != NULL && count != MAX_PAGES_PER_EXTENT ;
PValue = JudyLNext(pg_cache->commited_page_index.JudyL_array, &Index, PJE0),
descr = unlikely(NULL == PValue) ? NULL : *PValue) {
assert(0 != descr->page_length);
rrdeng_page_descr_mutex_lock(ctx, descr);
pg_cache_descr = descr->pg_cache_descr;
if (!(pg_cache_descr->flags & RRD_PAGE_WRITE_PENDING)) {
/* care, no reference being held */
pg_cache_descr->flags |= RRD_PAGE_WRITE_PENDING;
uncompressed_payload_length += descr->page_length;
descr_commit_idx_array[count] = Index;
eligible_pages[count++] = descr;
}
rrdeng_page_descr_mutex_unlock(ctx, descr);
}
uv_rwlock_rdunlock(&pg_cache->commited_page_index.lock);
if (!count) {
debug(D_RRDENGINE, "%s: no pages eligible for flushing.", __func__);
if (completion)
complete(completion);
return 0;
}
xt_io_descr = mallocz(sizeof(*xt_io_descr));
payload_offset = sizeof(*header) + count * sizeof(header->descr[0]);
switch (compression_algorithm) {
case RRD_NO_COMPRESSION:
size_bytes = payload_offset + uncompressed_payload_length + sizeof(*trailer);
break;
default: /* Compress */
assert(uncompressed_payload_length < LZ4_MAX_INPUT_SIZE);
max_compressed_size = LZ4_compressBound(uncompressed_payload_length);
compressed_buf = mallocz(max_compressed_size);
size_bytes = payload_offset + MAX(uncompressed_payload_length, (unsigned)max_compressed_size) + sizeof(*trailer);
break;
}
ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes));
if (unlikely(ret)) {
fatal("posix_memalign:%s", strerror(ret));
/* freez(xt_io_descr);*/
}
(void) memcpy(xt_io_descr->descr_array, eligible_pages, sizeof(struct rrdeng_page_descr *) * count);
xt_io_descr->descr_count = count;
pos = 0;
header = xt_io_descr->buf;
header->compression_algorithm = compression_algorithm;
header->number_of_pages = count;
pos += sizeof(*header);
extent = mallocz(sizeof(*extent) + count * sizeof(extent->pages[0]));
datafile = ctx->datafiles.last; /* TODO: check for exceeded size quota */
extent->offset = datafile->pos;
extent->number_of_pages = count;
extent->datafile = datafile;
extent->next = NULL;
for (i = 0 ; i < count ; ++i) {
/* This is here for performance reasons */
xt_io_descr->descr_commit_idx_array[i] = descr_commit_idx_array[i];
descr = xt_io_descr->descr_array[i];
header->descr[i].type = PAGE_METRICS;
uuid_copy(*(uuid_t *)header->descr[i].uuid, *descr->id);
header->descr[i].page_length = descr->page_length;
header->descr[i].start_time = descr->start_time;
header->descr[i].end_time = descr->end_time;
pos += sizeof(header->descr[i]);
}
for (i = 0 ; i < count ; ++i) {
descr = xt_io_descr->descr_array[i];
/* care, we don't hold the descriptor mutex */
(void) memcpy(xt_io_descr->buf + pos, descr->pg_cache_descr->page, descr->page_length);
descr->extent = extent;
extent->pages[i] = descr;
pos += descr->page_length;
}
df_extent_insert(extent);
switch (compression_algorithm) {
case RRD_NO_COMPRESSION:
header->payload_length = uncompressed_payload_length;
break;
default: /* Compress */
compressed_size = LZ4_compress_default(xt_io_descr->buf + payload_offset, compressed_buf,
uncompressed_payload_length, max_compressed_size);
ctx->stats.before_compress_bytes += uncompressed_payload_length;
ctx->stats.after_compress_bytes += compressed_size;
debug(D_RRDENGINE, "LZ4 compressed %"PRIu32" bytes to %d bytes.", uncompressed_payload_length, compressed_size);
(void) memcpy(xt_io_descr->buf + payload_offset, compressed_buf, compressed_size);
freez(compressed_buf);
size_bytes = payload_offset + compressed_size + sizeof(*trailer);
header->payload_length = compressed_size;
break;
}
extent->size = size_bytes;
xt_io_descr->bytes = size_bytes;
xt_io_descr->pos = datafile->pos;
xt_io_descr->req.data = xt_io_descr;
xt_io_descr->completion = completion;
trailer = xt_io_descr->buf + size_bytes - sizeof(*trailer);
crc = crc32(0L, Z_NULL, 0);
crc = crc32(crc, xt_io_descr->buf, size_bytes - sizeof(*trailer));
crc32set(trailer->checksum, crc);
real_io_size = ALIGN_BYTES_CEILING(size_bytes);
xt_io_descr->iov = uv_buf_init((void *)xt_io_descr->buf, real_io_size);
ret = uv_fs_write(wc->loop, &xt_io_descr->req, datafile->file, &xt_io_descr->iov, 1, datafile->pos, flush_pages_cb);
assert (-1 != ret);
ctx->stats.io_write_bytes += real_io_size;
++ctx->stats.io_write_requests;
ctx->stats.io_write_extent_bytes += real_io_size;
++ctx->stats.io_write_extents;
do_commit_transaction(wc, STORE_DATA, xt_io_descr);
datafile->pos += ALIGN_BYTES_CEILING(size_bytes);
ctx->disk_space += ALIGN_BYTES_CEILING(size_bytes);
rrdeng_test_quota(wc);
return ALIGN_BYTES_CEILING(size_bytes);
}
static void after_delete_old_data(uv_work_t *req, int status)
{
struct rrdengine_instance *ctx = req->data;
struct rrdengine_worker_config* wc = &ctx->worker_config;
struct rrdengine_datafile *datafile;
struct rrdengine_journalfile *journalfile;
unsigned deleted_bytes, journalfile_bytes, datafile_bytes;
int ret;
char path[RRDENG_PATH_MAX];
(void)status;
datafile = ctx->datafiles.first;
journalfile = datafile->journalfile;
datafile_bytes = datafile->pos;
journalfile_bytes = journalfile->pos;
deleted_bytes = 0;
info("Deleting data and journal file pair.");
datafile_list_delete(ctx, datafile);
ret = destroy_journal_file(journalfile, datafile);
if (!ret) {
generate_journalfilepath(datafile, path, sizeof(path));
info("Deleted journal file \"%s\".", path);
deleted_bytes += journalfile_bytes;
}
ret = destroy_data_file(datafile);
if (!ret) {
generate_datafilepath(datafile, path, sizeof(path));
info("Deleted data file \"%s\".", path);
deleted_bytes += datafile_bytes;
}
freez(journalfile);
freez(datafile);
ctx->disk_space -= deleted_bytes;
info("Reclaimed %u bytes of disk space.", deleted_bytes);
/* unfreeze command processing */
wc->now_deleting.data = NULL;
/* wake up event loop */
assert(0 == uv_async_send(&wc->async));
}
static void delete_old_data(uv_work_t *req)
{
struct rrdengine_instance *ctx = req->data;
struct rrdengine_datafile *datafile;
struct extent_info *extent, *next;
struct rrdeng_page_descr *descr;
unsigned count, i;
/* Safe to use since it will be deleted after we are done */
datafile = ctx->datafiles.first;
for (extent = datafile->extents.first ; extent != NULL ; extent = next) {
count = extent->number_of_pages;
for (i = 0 ; i < count ; ++i) {
descr = extent->pages[i];
pg_cache_punch_hole(ctx, descr, 0);
}
next = extent->next;
freez(extent);
}
}
void rrdeng_test_quota(struct rrdengine_worker_config* wc)
{
struct rrdengine_instance *ctx = wc->ctx;
struct rrdengine_datafile *datafile;
unsigned current_size, target_size;
uint8_t out_of_space, only_one_datafile;
int ret;
out_of_space = 0;
if (unlikely(ctx->disk_space > ctx->max_disk_space)) {
out_of_space = 1;
}
datafile = ctx->datafiles.last;
current_size = datafile->pos;
target_size = ctx->max_disk_space / TARGET_DATAFILES;
target_size = MIN(target_size, MAX_DATAFILE_SIZE);
target_size = MAX(target_size, MIN_DATAFILE_SIZE);
only_one_datafile = (datafile == ctx->datafiles.first) ? 1 : 0;
if (unlikely(current_size >= target_size || (out_of_space && only_one_datafile))) {
/* Finalize data and journal file and create a new pair */
wal_flush_transaction_buffer(wc);
ret = create_new_datafile_pair(ctx, 1, ctx->last_fileno + 1);
if (likely(!ret)) {
++ctx->last_fileno;
}
}
if (unlikely(out_of_space)) {
/* delete old data */
if (wc->now_deleting.data) {
/* already deleting data */
return;
}
if (NULL == ctx->datafiles.first->next) {
error("Cannot delete data file \"%s/"DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION"\""
" to reclaim space, there are no other file pairs left.",
ctx->dbfiles_path, ctx->datafiles.first->tier, ctx->datafiles.first->fileno);
return;
}
info("Deleting data file \"%s/"DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION"\".",
ctx->dbfiles_path, ctx->datafiles.first->tier, ctx->datafiles.first->fileno);
wc->now_deleting.data = ctx;
assert(0 == uv_queue_work(wc->loop, &wc->now_deleting, delete_old_data, after_delete_old_data));
}
}
/* return 0 on success */
int init_rrd_files(struct rrdengine_instance *ctx)
{
return init_data_files(ctx);
}
void finalize_rrd_files(struct rrdengine_instance *ctx)
{
return finalize_data_files(ctx);
}
void rrdeng_init_cmd_queue(struct rrdengine_worker_config* wc)
{
wc->cmd_queue.head = wc->cmd_queue.tail = 0;
wc->queue_size = 0;
assert(0 == uv_cond_init(&wc->cmd_cond));
assert(0 == uv_mutex_init(&wc->cmd_mutex));
}
void rrdeng_enq_cmd(struct rrdengine_worker_config* wc, struct rrdeng_cmd *cmd)
{
unsigned queue_size;
/* wait for free space in queue */
uv_mutex_lock(&wc->cmd_mutex);
while ((queue_size = wc->queue_size) == RRDENG_CMD_Q_MAX_SIZE) {
uv_cond_wait(&wc->cmd_cond, &wc->cmd_mutex);
}
assert(queue_size < RRDENG_CMD_Q_MAX_SIZE);
/* enqueue command */
wc->cmd_queue.cmd_array[wc->cmd_queue.tail] = *cmd;
wc->cmd_queue.tail = wc->cmd_queue.tail != RRDENG_CMD_Q_MAX_SIZE - 1 ?
wc->cmd_queue.tail + 1 : 0;
wc->queue_size = queue_size + 1;
uv_mutex_unlock(&wc->cmd_mutex);
/* wake up event loop */
assert(0 == uv_async_send(&wc->async));
}
struct rrdeng_cmd rrdeng_deq_cmd(struct rrdengine_worker_config* wc)
{
struct rrdeng_cmd ret;
unsigned queue_size;
uv_mutex_lock(&wc->cmd_mutex);
queue_size = wc->queue_size;
if (queue_size == 0) {
ret.opcode = RRDENG_NOOP;
} else {
/* dequeue command */
ret = wc->cmd_queue.cmd_array[wc->cmd_queue.head];
if (queue_size == 1) {
wc->cmd_queue.head = wc->cmd_queue.tail = 0;
} else {
wc->cmd_queue.head = wc->cmd_queue.head != RRDENG_CMD_Q_MAX_SIZE - 1 ?
wc->cmd_queue.head + 1 : 0;
}
wc->queue_size = queue_size - 1;
/* wake up producers */
uv_cond_signal(&wc->cmd_cond);
}
uv_mutex_unlock(&wc->cmd_mutex);
return ret;
}
void async_cb(uv_async_t *handle)
{
uv_stop(handle->loop);
uv_update_time(handle->loop);
debug(D_RRDENGINE, "%s called, active=%d.", __func__, uv_is_active((uv_handle_t *)handle));
}
void timer_cb(uv_timer_t* handle)
{
struct rrdengine_worker_config* wc = handle->data;
uv_stop(handle->loop);
uv_update_time(handle->loop);
rrdeng_test_quota(wc);
debug(D_RRDENGINE, "%s: timeout reached.", __func__);
if (likely(!wc->now_deleting.data)) {
unsigned total_bytes, bytes_written;
/* There is free space so we can write to disk */
debug(D_RRDENGINE, "Flushing pages to disk.");
for (total_bytes = bytes_written = do_flush_pages(wc, 0, NULL) ;
bytes_written && (total_bytes < DATAFILE_IDEAL_IO_SIZE) ;
total_bytes += bytes_written) {
bytes_written = do_flush_pages(wc, 0, NULL);
}
}
#ifdef NETDATA_INTERNAL_CHECKS
{
char buf[4096];
debug(D_RRDENGINE, "%s", get_rrdeng_statistics(wc->ctx, buf, sizeof(buf)));
}
#endif
}
/* Flushes dirty pages when timer expires */
#define TIMER_PERIOD_MS (1000)
#define CMD_BATCH_SIZE (256)
void rrdeng_worker(void* arg)
{
struct rrdengine_worker_config* wc = arg;
struct rrdengine_instance *ctx = wc->ctx;
uv_loop_t* loop;
int shutdown, ret;
enum rrdeng_opcode opcode;
uv_timer_t timer_req;
struct rrdeng_cmd cmd;
rrdeng_init_cmd_queue(wc);
loop = wc->loop = mallocz(sizeof(uv_loop_t));
ret = uv_loop_init(loop);
if (ret) {
error("uv_loop_init(): %s", uv_strerror(ret));
goto error_after_loop_init;
}
loop->data = wc;
ret = uv_async_init(wc->loop, &wc->async, async_cb);
if (ret) {
error("uv_async_init(): %s", uv_strerror(ret));
goto error_after_async_init;
}
wc->async.data = wc;
wc->now_deleting.data = NULL;
/* dirty page flushing timer */
ret = uv_timer_init(loop, &timer_req);
if (ret) {
error("uv_timer_init(): %s", uv_strerror(ret));
goto error_after_timer_init;
}
timer_req.data = wc;
wc->error = 0;
/* wake up initialization thread */
complete(&ctx->rrdengine_completion);
assert(0 == uv_timer_start(&timer_req, timer_cb, TIMER_PERIOD_MS, TIMER_PERIOD_MS));
shutdown = 0;
while (shutdown == 0 || uv_loop_alive(loop)) {
uv_run(loop, UV_RUN_DEFAULT);
/* wait for commands */
do {
cmd = rrdeng_deq_cmd(wc);
opcode = cmd.opcode;
switch (opcode) {
case RRDENG_NOOP:
/* the command queue was empty, do nothing */
break;
case RRDENG_SHUTDOWN:
shutdown = 1;
/*
* uv_async_send after uv_close does not seem to crash in linux at the moment,
* it is however undocumented behaviour and we need to be aware if this becomes
* an issue in the future.
*/
uv_close((uv_handle_t *)&wc->async, NULL);
assert(0 == uv_timer_stop(&timer_req));
uv_close((uv_handle_t *)&timer_req, NULL);
break;
case RRDENG_READ_PAGE:
do_read_extent(wc, &cmd.read_page.page_cache_descr, 1, 0);
break;
case RRDENG_READ_EXTENT:
do_read_extent(wc, cmd.read_extent.page_cache_descr, cmd.read_extent.page_count, 1);
break;
case RRDENG_COMMIT_PAGE:
do_commit_transaction(wc, STORE_DATA, NULL);
break;
case RRDENG_FLUSH_PAGES: {
unsigned bytes_written;
/* First I/O should be enough to call completion */
bytes_written = do_flush_pages(wc, 1, cmd.completion);
if (bytes_written) {
while (do_flush_pages(wc, 1, NULL)) {
; /* Force flushing of all commited pages. */
}
}
break;
}
default:
debug(D_RRDENGINE, "%s: default.", __func__);
break;
}
} while (opcode != RRDENG_NOOP);
}
/* cleanup operations of the event loop */
if (unlikely(wc->now_deleting.data)) {
info("Postponing shutting RRD engine event loop down until after datafile deletion is finished.");
}
info("Shutting down RRD engine event loop.");
while (do_flush_pages(wc, 1, NULL)) {
; /* Force flushing of all commited pages. */
}
wal_flush_transaction_buffer(wc);
uv_run(loop, UV_RUN_DEFAULT);
info("Shutting down RRD engine event loop complete.");
/* TODO: don't let the API block by waiting to enqueue commands */
uv_cond_destroy(&wc->cmd_cond);
/* uv_mutex_destroy(&wc->cmd_mutex); */
assert(0 == uv_loop_close(loop));
freez(loop);
return;
error_after_timer_init:
uv_close((uv_handle_t *)&wc->async, NULL);
error_after_async_init:
assert(0 == uv_loop_close(loop));
error_after_loop_init:
freez(loop);
wc->error = UV_EAGAIN;
/* wake up initialization thread */
complete(&ctx->rrdengine_completion);
}
#define NR_PAGES (256)
static void basic_functional_test(struct rrdengine_instance *ctx)
{
int i, j, failed_validations;
uuid_t uuid[NR_PAGES];
void *buf;
struct rrdeng_page_descr *handle[NR_PAGES];
char uuid_str[UUID_STR_LEN];
char backup[NR_PAGES][UUID_STR_LEN * 100]; /* backup storage for page data verification */
for (i = 0 ; i < NR_PAGES ; ++i) {
uuid_generate(uuid[i]);
uuid_unparse_lower(uuid[i], uuid_str);
// fprintf(stderr, "Generated uuid[%d]=%s\n", i, uuid_str);
buf = rrdeng_create_page(ctx, &uuid[i], &handle[i]);
/* Each page contains 10 times its own UUID stringified */
for (j = 0 ; j < 100 ; ++j) {
strcpy(buf + UUID_STR_LEN * j, uuid_str);
strcpy(backup[i] + UUID_STR_LEN * j, uuid_str);
}
rrdeng_commit_page(ctx, handle[i], (Word_t)i);
}
fprintf(stderr, "\n********** CREATED %d METRIC PAGES ***********\n\n", NR_PAGES);
failed_validations = 0;
for (i = 0 ; i < NR_PAGES ; ++i) {
buf = rrdeng_get_latest_page(ctx, &uuid[i], (void **)&handle[i]);
if (NULL == buf) {
++failed_validations;
fprintf(stderr, "Page %d was LOST.\n", i);
}
if (memcmp(backup[i], buf, UUID_STR_LEN * 100)) {
++failed_validations;
fprintf(stderr, "Page %d data comparison with backup FAILED validation.\n", i);
}
rrdeng_put_page(ctx, handle[i]);
}
fprintf(stderr, "\n********** CORRECTLY VALIDATED %d/%d METRIC PAGES ***********\n\n",
NR_PAGES - failed_validations, NR_PAGES);
}
/* C entry point for development purposes
* make "LDFLAGS=-errdengine_main"
*/
void rrdengine_main(void)
{
int ret;
struct rrdengine_instance *ctx;
ret = rrdeng_init(&ctx, "/tmp", RRDENG_MIN_PAGE_CACHE_SIZE_MB, RRDENG_MIN_DISK_SPACE_MB);
if (ret) {
exit(ret);
}
basic_functional_test(ctx);
rrdeng_exit(ctx);
fprintf(stderr, "Hello world!");
exit(0);
}