Multi-Tier database backend for long term metrics storage (#13263)

* Tier part 1 * Tier part 2 * Tier part 3 * Tier part 4 * Tier part 5 * Fix some ML compilation errors * fix more conflicts * pass proper tier * move metric_uuid from state to RRDDIM * move aclk_live_status from state to RRDDIM * move ml_dimension from state to RRDDIM * abstracted the data collection interface * support flushing for mem db too * abstracted the query api * abstracted latest/oldest time per metric * cleanup * store_metric for tier1 * fix for store_metric * allow multiple tiers, more than 2 * state to tier * Change storage type in db. Query param to request min, max, sum or average * Store tier data correctly * Fix skipping tier page type * Add tier grouping in the tier * Fix to handle archived charts (part 1) * Temp fix for query granularity when requesting tier1 data * Fix parameters in the correct order and calculate the anomaly based on the anomaly count * Proper tiering grouping * Anomaly calculation based on anomaly count * force type checking on storage handles * update cmocka tests * fully dynamic number of storage tiers * fix static allocation * configure grouping for all tiers; disable tiers for unittest; disable statsd configuration for private charts mode * use default page dt using the tiering info * automatic selection of tier * fix for automatic selection of tier * working prototype of dynamic tier selection * automatic selection of tier done right (I hope) * ask for the proper tier value, based on the grouping function * fixes for unittests and load_metric_next() * fixes for lgtm findings * minor renames * add dbengine to page cache size setting * add dbengine to page cache with malloc * query engine optimized to loop as little are required based on the view_update_every * query engine grouping methods now do not assume a constant number of points per group and they allocate memory with OWA * report db points per tier in jsonwrap * query planer that switches database tiers on the fly to satisfy the query for the entire timeframe * dbegnine statistics and documentation (in progress) * calculate average point duration in db * handle single point pages the best we can * handle single point pages even better * Keep page type in the rrdeng_page_descr * updated doc * handle future backwards compatibility - improved statistics * support &tier=X in queries * enfore increasing iterations on tiers * tier 1 is always 1 iteration * backfilling higher tiers on first data collection * reversed anomaly bit * set up to 5 tiers * natural points should only be offered on tier 0, except a specific tier is selected * do not allow more than 65535 points of tier0 to be aggregated on any tier * Work only on actually activated tiers * fix query interpolation * fix query interpolation again * fix lgtm finding * Activate one tier for now * backfilling of higher tiers using raw metrics from lower tiers * fix for crash on start when storage tiers is increased from the default * more statistics on exit * fix bug that prevented higher tiers to get any values; added backfilling options * fixed the statistics log line * removed limit of 255 iterations per tier; moved the code of freezing rd->tiers[x]->db_metric_handle * fixed division by zero on zero points_wanted * removed dead code * Decide on the descr->type for the type of metric * dont store metrics on unknown page types * free db_metric_handle on sql based context queries * Disable STORAGE_POINT value check in the exporting engine unit tests * fix for db modes other than dbengine * fix for aclk archived chart queries destroying db_metric_handles of valid rrddims * fix left-over freez() instead of OWA freez on median queries Co-authored-by: Costa Tsaousis <costa@netdata.cloud> Co-authored-by: Vladimir Kobal <vlad@prokk.net>
2025-04-17 11:12:42 +00:00 · 2022-07-06 14:01:53 +03:00 · 2022-07-06 14:01:53 +03:00 · 49234f23de
commit 49234f23de
parent 8d5850fd49
62 changed files with 2362 additions and 1285 deletions
--- a/collectors/plugins.d/pluginsd_parser.c
+++ b/collectors/plugins.d/pluginsd_parser.c
@ -146,13 +146,13 @@ PARSER_RC pluginsd_dimension_action(void *user, RRDSET *st, char *id, char *name
    if (likely(unhide_dimension)) {
        rrddim_flag_clear(rd, RRDDIM_FLAG_HIDDEN);
        if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) {
-            (void)sql_set_dimension_option(&rd->state->metric_uuid, NULL);
+            (void)sql_set_dimension_option(&rd->metric_uuid, NULL);
            rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN);
        }
    } else {
        rrddim_flag_set(rd, RRDDIM_FLAG_HIDDEN);
        if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) {
-           (void)sql_set_dimension_option(&rd->state->metric_uuid, "hidden");
+           (void)sql_set_dimension_option(&rd->metric_uuid, "hidden");
            rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN);
        }
    }
--- a/collectors/statsd.plugin/statsd.c
+++ b/collectors/statsd.plugin/statsd.c
@ -271,9 +271,7 @@ static struct statsd {
    size_t tcp_idle_timeout;
    collected_number decimal_detail;
    size_t private_charts;
-    size_t max_private_charts;
    size_t max_private_charts_hard;
-    RRD_MEMORY_MODE private_charts_memory_mode;
    long private_charts_rrd_history_entries;
    unsigned int private_charts_hidden:1;

@ -290,7 +288,6 @@ static struct statsd {
    LISTEN_SOCKETS sockets;
 } statsd = {
        .enabled = 1,
-        .max_private_charts = 200,
        .max_private_charts_hard = 1000,
        .private_charts_hidden = 0,
        .recvmmsg_size = 10,
@ -1591,7 +1588,7 @@ static inline void statsd_get_metric_type_and_id(STATSD_METRIC *m, char *type, c
 }

 static inline RRDSET *statsd_private_rrdset_create(
-        STATSD_METRIC *m
+        STATSD_METRIC *m __maybe_unused
        , const char *type
        , const char *id
        , const char *name
@ -1603,16 +1600,6 @@ static inline RRDSET *statsd_private_rrdset_create(
        , int update_every
        , RRDSET_TYPE chart_type
 ) {
-    RRD_MEMORY_MODE memory_mode = statsd.private_charts_memory_mode;
-    long history = statsd.private_charts_rrd_history_entries;
-
-    if(unlikely(statsd.private_charts >= statsd.max_private_charts)) {
-        debug(D_STATSD, "STATSD: metric '%s' will be charted with memory mode = none, because the maximum number of charts has been reached.", m->name);
-        info("STATSD: metric '%s' will be charted with memory mode = none, because the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts);
-        memory_mode = RRD_MEMORY_MODE_NONE;
-        history = 5;
-    }
-
    statsd.private_charts++;
    RRDSET *st = rrdset_create_custom(
            localhost         // host
@ -1628,8 +1615,8 @@ static inline RRDSET *statsd_private_rrdset_create(
            , priority        // priority
            , update_every    // update every
            , chart_type      // chart type
-            , memory_mode     // memory mode
-            , history         // history
+            , default_rrd_memory_mode     // memory mode
+            , default_rrd_history_entries // history
    );
    rrdset_flag_set(st, RRDSET_FLAG_STORE_FIRST);

@ -2300,7 +2287,7 @@ static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_
        if(unlikely(!(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_CHECKED))) {
            if(unlikely(statsd.private_charts >= statsd.max_private_charts_hard)) {
                debug(D_STATSD, "STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts has been reached.", m->name);
-                info("STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts);
+                info("STATSD: metric '%s' will not be charted, because the hard limit of the maximum number of charts (%zu) has been reached. Increase the number of charts by editing netdata.conf, [statsd] section.", m->name, statsd.max_private_charts_hard);
                m->options &= ~STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED;
            }
            else {
@ -2446,9 +2433,7 @@ void *statsd_main(void *ptr) {
 #endif

    statsd.charts_for = simple_pattern_create(config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL, SIMPLE_PATTERN_EXACT);
-    statsd.max_private_charts = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts allowed", (long long)statsd.max_private_charts);
-    statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts * 5);
-    statsd.private_charts_memory_mode = rrd_memory_mode_id(config_get(CONFIG_SECTION_STATSD, "private charts memory mode", rrd_memory_mode_name(default_rrd_memory_mode)));
+    statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts_hard);
    statsd.private_charts_rrd_history_entries = (int)config_get_number(CONFIG_SECTION_STATSD, "private charts history", default_rrd_history_entries);
    statsd.decimal_detail = (collected_number)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail);
    statsd.tcp_idle_timeout = (size_t) config_get_number(CONFIG_SECTION_STATSD, "disconnect idle tcp clients after seconds", (long long int)statsd.tcp_idle_timeout);
--- a/daemon/config/README.md
+++ b/daemon/config/README.md
@ -82,21 +82,21 @@ Please note that your data history will be lost if you have modified `history` p

 ### [db] section options

-|              setting               |  default   | info                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-|:----------------------------------:|:----------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-|                mode                | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `page cache size MB` and `dbengine disk space MB`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. |
-|             retention              |   `3600`   | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](/database/README.md) for more information.                                                                                                                                                                                                                                                                                                                                                  |
-|            update every            |    `1`     | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-|         page cache size MB         |     32     | Determines the amount of RAM in MiB that is dedicated to caching Netdata metric values.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-|       dbengine disk space MB       |    256     | Determines the amount of disk space in MiB that is dedicated to storing Netdata metric values and all related metadata describing them.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-|  dbengine multihost disk space MB  |    256     | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well.                                                                                                                                                                                                                                                                                                                                                                                                                                       |
-|     memory deduplication (ksm)     |   `yes`    | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm)                                                                                                                                                                                                                                                                                                                                                      |
-| cleanup obsolete charts after secs |   `3600`   | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-|   gap when lost iterations above   |    `1`     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
-|  cleanup orphan hosts after secs   |   `3600`   | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-|    delete obsolete charts files    |   `yes`    | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
-|     delete orphan hosts files      |   `yes`    | Set to `no` to disable non-responsive host removal.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
-|        enable zero metrics         |    `no`    | Set to `yes` to show charts when all their metrics are zero.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+|              setting               |  default   | info                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|:----------------------------------:|:----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+|                mode                | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. |
+|             retention              |   `3600`   | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](/database/README.md) for more information.                                                                                                                                                                                                                                                                                                                                                           |
+|            update every            |    `1`     | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|    dbengine page cache size MB     |     32     | Determines the amount of RAM in MiB that is dedicated to caching Netdata metric values.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|       dbengine disk space MB       |    256     | Determines the amount of disk space in MiB that is dedicated to storing Netdata metric values and all related metadata describing them.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|  dbengine multihost disk space MB  |    256     | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well.                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+|     memory deduplication (ksm)     |   `yes`    | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm)                                                                                                                                                                                                                                                                                                                                                               |
+| cleanup obsolete charts after secs |   `3600`   | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+|   gap when lost iterations above   |    `1`     |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
+|  cleanup orphan hosts after secs   |   `3600`   | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|    delete obsolete charts files    |   `yes`    | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
+|     delete orphan hosts files      |   `yes`    | Set to `no` to disable non-responsive host removal.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+|        enable zero metrics         |    `no`    | Set to `yes` to show charts when all their metrics are zero.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |

 ### [directories] section options

--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@ -451,21 +451,28 @@ static void dbengine_statistics_charts(void) {
        RRDHOST *host;
        unsigned long long stats_array[RRDENG_NR_STATS] = {0};
        unsigned long long local_stats_array[RRDENG_NR_STATS];
-        unsigned dbengine_contexts = 0, counted_multihost_db = 0, i;
+        unsigned dbengine_contexts = 0, counted_multihost_db[RRD_STORAGE_TIERS] = { 0 }, i;

        rrdhost_foreach_read(host) {
            if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && !rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) {
-                if (&multidb_ctx == host->rrdeng_ctx) {
-                    if (counted_multihost_db)
-                        continue; /* Only count multi-host DB once */
-                    counted_multihost_db = 1;
-                }
-                ++dbengine_contexts;
-                /* get localhost's DB engine's statistics */
-                rrdeng_get_37_statistics(host->rrdeng_ctx, local_stats_array);
-                for (i = 0; i < RRDENG_NR_STATS; ++i) {
-                    /* aggregate statistics across hosts */
-                    stats_array[i] += local_stats_array[i];
+
+                /* get localhost's DB engine's statistics for each tier */
+                for(int tier = 0; tier < storage_tiers ;tier++) {
+                    if(!host->storage_instance[tier]) continue;
+
+                    if(is_storage_engine_shared(host->storage_instance[tier])) {
+                        if(counted_multihost_db[tier])
+                            continue;
+                        else
+                            counted_multihost_db[tier] = 1;
+                    }
+
+                    ++dbengine_contexts;
+                    rrdeng_get_37_statistics((struct rrdengine_instance *)host->storage_instance[tier], local_stats_array);
+                    for (i = 0; i < RRDENG_NR_STATS; ++i) {
+                        /* aggregate statistics across hosts */
+                        stats_array[i] += local_stats_array[i];
+                    }
                }
            }
        }
--- a/daemon/main.c
+++ b/daemon/main.c
@ -55,11 +55,13 @@ void netdata_cleanup_and_exit(int ret) {
        // free the database
        info("EXIT: freeing database memory...");
 #ifdef ENABLE_DBENGINE
-        rrdeng_prepare_exit(&multidb_ctx);
+        for(int tier = 0; tier < storage_tiers ; tier++)
+            rrdeng_prepare_exit(multidb_ctx[tier]);
 #endif
        rrdhost_free_all();
 #ifdef ENABLE_DBENGINE
-        rrdeng_exit(&multidb_ctx);
+        for(int tier = 0; tier < storage_tiers ; tier++)
+            rrdeng_exit(multidb_ctx[tier]);
 #endif
    }
    sql_close_database();
@ -533,10 +535,16 @@ static void backwards_compatible_config() {
                CONFIG_SECTION_DB,      "update every");

    config_move(CONFIG_SECTION_GLOBAL,  "page cache size",
-                CONFIG_SECTION_DB,      "page cache size MB");
+                CONFIG_SECTION_DB,      "dbengine page cache size MB");
+
+    config_move(CONFIG_SECTION_DB,      "page cache size",
+                CONFIG_SECTION_DB,      "dbengine page cache size MB");

    config_move(CONFIG_SECTION_GLOBAL,  "page cache uses malloc",
-                CONFIG_SECTION_DB,      "page cache with malloc");
+                CONFIG_SECTION_DB,      "dbengine page cache with malloc");
+
+    config_move(CONFIG_SECTION_DB,      "page cache with malloc",
+                CONFIG_SECTION_DB,      "dbengine page cache with malloc");

    config_move(CONFIG_SECTION_GLOBAL,  "dbengine disk space",
                CONFIG_SECTION_DB,      "dbengine disk space MB");
@ -650,12 +658,12 @@ static void get_netdata_configured_variables() {
    // ------------------------------------------------------------------------
    // get default Database Engine page cache size in MiB

-    db_engine_use_malloc = config_get_boolean(CONFIG_SECTION_DB, "page cache with malloc", CONFIG_BOOLEAN_NO);
-    default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "page cache size MB", default_rrdeng_page_cache_mb);
+    db_engine_use_malloc = config_get_boolean(CONFIG_SECTION_DB, "dbengine page cache with malloc", CONFIG_BOOLEAN_NO);
+    default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
    if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) {
        error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB);
        default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
-        config_set_number(CONFIG_SECTION_DB, "page cache size MB", default_rrdeng_page_cache_mb);
+        config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
    }

    // ------------------------------------------------------------------------
@ -946,6 +954,7 @@ int main(int argc, char **argv) {
                            default_rrd_update_every = 1;
                            default_rrd_memory_mode = RRD_MEMORY_MODE_RAM;
                            default_health_enabled = 0;
+                            storage_tiers = 1;
                            registry_init();
                            if(rrd_init("unittest", NULL)) {
                                fprintf(stderr, "rrd_init failed for unittest\n");
@ -1303,22 +1312,6 @@ int main(int argc, char **argv) {
        // initialize the log files
        open_all_log_files();

-#ifdef ENABLE_DBENGINE
-        default_rrdeng_page_fetch_timeout = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page fetch timeout secs", PAGE_CACHE_FETCH_WAIT_TIMEOUT);
-        if (default_rrdeng_page_fetch_timeout < 1) {
-            info("'dbengine page fetch timeout secs' cannot be %d, using 1", default_rrdeng_page_fetch_timeout);
-            default_rrdeng_page_fetch_timeout = 1;
-            config_set_number(CONFIG_SECTION_DB, "dbengine page fetch timeout secs", default_rrdeng_page_fetch_timeout);
-        }
-
-        default_rrdeng_page_fetch_retries = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page fetch retries", MAX_PAGE_CACHE_FETCH_RETRIES);
-        if (default_rrdeng_page_fetch_retries < 1) {
-            info("\"dbengine page fetch retries\" found in netdata.conf cannot be %d, using 1", default_rrdeng_page_fetch_retries);
-            default_rrdeng_page_fetch_retries = 1;
-            config_set_number(CONFIG_SECTION_DB, "dbengine page fetch retries", default_rrdeng_page_fetch_retries);
-        }
-#endif
-
        get_system_timezone();

        // --------------------------------------------------------------------
--- a/daemon/unit_test.c
+++ b/daemon/unit_test.c
@ -1704,7 +1704,7 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI
    // Fluh pages for subsequent real values
    for (i = 0 ; i < CHARTS ; ++i) {
        for (j = 0; j < DIMS; ++j) {
-            rrdeng_store_metric_flush_current_page(rd[i][j]);
+            rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle);
        }
    }
 }
@ -1751,11 +1751,10 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI
 {
    fprintf(stderr, "%s() running...\n", __FUNCTION__ );
    uint8_t same;
-    time_t time_now, time_retrieved;
+    time_t time_now, time_retrieved, end_time;
    int i, j, k, c, errors, update_every;
    collected_number last;
    NETDATA_DOUBLE value, expected;
-    SN_FLAGS nflags;
    struct rrddim_query_handle handle;
    size_t value_errors = 0, time_errors = 0;

@ -1767,14 +1766,16 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI
        time_now = time_start + (c + 1) * update_every;
        for (i = 0 ; i < CHARTS ; ++i) {
            for (j = 0; j < DIMS; ++j) {
-                rd[i][j]->state->query_ops.init(rd[i][j], &handle, time_now, time_now + QUERY_BATCH * update_every);
+                rd[i][j]->tiers[0]->query_ops.init(rd[i][j]->tiers[0]->db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every, TIER_QUERY_FETCH_SUM);
                for (k = 0; k < QUERY_BATCH; ++k) {
                    last = ((collected_number)i * DIMS) * REGION_POINTS[current_region] +
                           j * REGION_POINTS[current_region] + c + k;
                    expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS));

-                    time_t end_time;
-                    value = rd[i][j]->state->query_ops.next_metric(&handle, &time_retrieved, &end_time, &nflags);
+                    STORAGE_POINT sp = rd[i][j]->tiers[0]->query_ops.next_metric(&handle);
+                    value = sp.sum;
+                    time_retrieved = sp.start_time;
+                    end_time = sp.end_time;

                    same = (roundndd(value) == roundndd(expected)) ? 1 : 0;
                    if(!same) {
@ -1793,7 +1794,7 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI
                        errors++;
                    }
                }
-                rd[i][j]->state->query_ops.finalize(&handle);
+                rd[i][j]->tiers[0]->query_ops.finalize(&handle);
            }
        }
    }
@ -1826,7 +1827,7 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
        ONEWAYALLOC *owa = onewayalloc_create(0);
        RRDR *r = rrd2rrdr(owa, st[i], points, time_start, time_end,
                           RRDR_GROUPING_AVERAGE, 0, RRDR_OPTION_NATURAL_POINTS,
-                           NULL, NULL, NULL, 0);
+                           NULL, NULL, NULL, 0, 0);

        if (!r) {
            fprintf(stderr, "    DB-engine unittest %s: empty RRDR on region %d ### E R R O R ###\n", st[i]->name, current_region);
@ -1913,7 +1914,7 @@ int test_dbengine(void)
    for (i = 0 ; i < CHARTS ; ++i) {
        st[i]->update_every = update_every;
        for (j = 0; j < DIMS; ++j) {
-            rrdeng_store_metric_flush_current_page(rd[i][j]);
+            rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle);
        }
    }

@ -1932,7 +1933,7 @@ int test_dbengine(void)
    for (i = 0 ; i < CHARTS ; ++i) {
        st[i]->update_every = update_every;
        for (j = 0; j < DIMS; ++j) {
-            rrdeng_store_metric_flush_current_page(rd[i][j]);
+            rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle);
        }
    }

@ -1960,7 +1961,7 @@ int test_dbengine(void)
        ONEWAYALLOC *owa = onewayalloc_create(0);
        RRDR *r = rrd2rrdr(owa, st[i], points, time_start[0] + update_every,
                           time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0,
-                           RRDR_OPTION_NATURAL_POINTS, NULL, NULL, NULL, 0);
+                           RRDR_OPTION_NATURAL_POINTS, NULL, NULL, NULL, 0, 0);
        if (!r) {
            fprintf(stderr, "    DB-engine unittest %s: empty RRDR ### E R R O R ###\n", st[i]->name);
            ++errors;
@ -2005,9 +2006,9 @@ int test_dbengine(void)
    }
 error_out:
    rrd_wrlock();
-    rrdeng_prepare_exit(host->rrdeng_ctx);
+    rrdeng_prepare_exit((struct rrdengine_instance *)host->storage_instance[0]);
    rrdhost_delete_charts(host);
-    rrdeng_exit(host->rrdeng_ctx);
+    rrdeng_exit((struct rrdengine_instance *)host->storage_instance[0]);
    rrd_unlock();

    return errors + value_errors + time_errors;
@ -2092,7 +2093,7 @@ static void generate_dbengine_chart(void *arg)
        thread_info->time_max = time_current;
    }
    for (j = 0; j < DSET_DIMS; ++j) {
-        rrdeng_store_metric_finalize(rd[j]);
+        rrdeng_store_metric_finalize((rd[j])->tiers[0]->db_collection_handle);
    }
 }

@ -2182,10 +2183,9 @@ static void query_dbengine_chart(void *arg)
    RRDSET *st;
    RRDDIM *rd;
    uint8_t same;
-    time_t time_now, time_retrieved;
+    time_t time_now, time_retrieved, end_time;
    collected_number generatedv;
    NETDATA_DOUBLE value, expected;
-    SN_FLAGS nflags;
    struct rrddim_query_handle handle;
    size_t value_errors = 0, time_errors = 0;

@ -2213,13 +2213,13 @@ static void query_dbengine_chart(void *arg)
            time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */
        }

-        rd->state->query_ops.init(rd, &handle, time_after, time_before);
+        rd->tiers[0]->query_ops.init(rd->tiers[0]->db_metric_handle, &handle, time_after, time_before, TIER_QUERY_FETCH_SUM);
        ++thread_info->queries_nr;
        for (time_now = time_after ; time_now <= time_before ; time_now += update_every) {
            generatedv = generate_dbengine_chart_value(i, j, time_now);
            expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE) generatedv, SN_DEFAULT_FLAGS));

-            if (unlikely(rd->state->query_ops.is_finished(&handle))) {
+            if (unlikely(rd->tiers[0]->query_ops.is_finished(&handle))) {
                if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
                    fprintf(stderr, "    DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT
                        ", found data gap, ### E R R O R ###\n",
@ -2228,8 +2228,12 @@ static void query_dbengine_chart(void *arg)
                }
                break;
            }
-            time_t end_time;
-            value = rd->state->query_ops.next_metric(&handle, &time_retrieved, &end_time, &nflags);
+
+            STORAGE_POINT sp = rd->tiers[0]->query_ops.next_metric(&handle);
+            value = sp.sum;
+            time_retrieved = sp.start_time;
+            end_time = sp.end_time;
+
            if (!netdata_double_isnumber(value)) {
                if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
                    fprintf(stderr, "    DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT
@ -2263,7 +2267,7 @@ static void query_dbengine_chart(void *arg)
                }
            }
        }
-        rd->state->query_ops.finalize(&handle);
+        rd->tiers[0]->query_ops.finalize(&handle);
    } while(!thread_info->done);

    if(value_errors)
@ -2411,9 +2415,9 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
    }
    freez(query_threads);
    rrd_wrlock();
-    rrdeng_prepare_exit(host->rrdeng_ctx);
+    rrdeng_prepare_exit((struct rrdengine_instance *)host->storage_instance[0]);
    rrdhost_delete_charts(host);
-    rrdeng_exit(host->rrdeng_ctx);
+    rrdeng_exit((struct rrdengine_instance *)host->storage_instance[0]);
    rrd_unlock();
 }

--- a/database/engine/README.md
+++ b/database/engine/README.md
@ -26,18 +26,18 @@ To use the database engine, open `netdata.conf` and set `[db].mode` to `dbengine
    mode = dbengine
 ```

-To configure the database engine, look for the `page cache size MB` and `dbengine multihost disk space MB` settings in the
+To configure the database engine, look for the `dbengine page cache size MB` and `dbengine multihost disk space MB` settings in the
 `[db]` section of your `netdata.conf`. The Agent ignores the `[db].retention` setting when using the dbengine.

 ```conf
 [db]
-    page cache size MB = 32
+    dbengine page cache size MB = 32
    dbengine multihost disk space MB = 256
 ```

 The above values are the default values for Page Cache size and DB engine disk space quota.

-The `page cache size MB` option determines the amount of RAM dedicated to caching Netdata metric values. The
+The `dbengine page cache size MB` option determines the amount of RAM dedicated to caching Netdata metric values. The
 actual page cache size will be slightly larger than this figure—see the [memory requirements](#memory-requirements)
 section for details.

@ -59,10 +59,10 @@ Netdata metric values per legacy database engine instance (see [details on the l

 ### Streaming metrics to the database engine

-When using the multihost database engine, all parent and child nodes share the same `page cache size MB` and `dbengine
+When using the multihost database engine, all parent and child nodes share the same `dbengine page cache size MB` and `dbengine
 multihost disk space MB` in a single dbengine instance. The [**database engine
 calculator**](/docs/store/change-metrics-storage.md#calculate-the-system-resources-ram-disk-space-needed-to-store-metrics)
-helps you properly set `page cache size MB` and `dbengine multihost disk space MB` on your parent node to allocate enough
+helps you properly set `dbengine page cache size MB` and `dbengine multihost disk space MB` on your parent node to allocate enough
 resources based on your metrics retention policy and how many child nodes you have.

 #### Legacy mode
@ -98,7 +98,7 @@ available memory.
 There are explicit memory requirements **per** DB engine **instance**:

 -   The total page cache memory footprint will be an additional `#dimensions-being-collected x 4096 x 2` bytes over what
-    the user configured with `page cache size MB`.
+    the user configured with `dbengine page cache size MB`.

 -   an additional `#pages-on-disk x 4096 x 0.03` bytes of RAM are allocated for metadata.

--- a/database/engine/datafile.c
+++ b/database/engine/datafile.c
@ -444,18 +444,44 @@ void finalize_data_files(struct rrdengine_instance *ctx)
    struct rrdengine_journalfile *journalfile;
    struct extent_info *extent, *next_extent;

+    size_t extents_number = 0;
+    size_t extents_bytes = 0;
+    size_t page_compressed_sizes = 0;
+
+    size_t files_number = 0;
+    size_t files_bytes = 0;
+
    for (datafile = ctx->datafiles.first ; datafile != NULL ; datafile = next_datafile) {
        journalfile = datafile->journalfile;
        next_datafile = datafile->next;

        for (extent = datafile->extents.first ; extent != NULL ; extent = next_extent) {
+            extents_number++;
+            extents_bytes += sizeof(*extent) + sizeof(struct rrdeng_page_descr *) * extent->number_of_pages;
+            page_compressed_sizes += extent->size;
+
            next_extent = extent->next;
            freez(extent);
        }
        close_journal_file(journalfile, datafile);
        close_data_file(datafile);
+
+        files_number++;
+        files_bytes += sizeof(*journalfile) + sizeof(*datafile);
+
        freez(journalfile);
        freez(datafile);
-
    }
+
+    if(!files_number) files_number = 1;
+    if(!extents_number) extents_number = 1;
+
+    info("DBENGINE STATISTICS ON DATAFILES:"
+         " Files %zu, structures %zu bytes, %0.2f bytes per file."
+         " Extents %zu, structures %zu bytes, %0.2f bytes per extent."
+         " Compressed size of all pages: %zu bytes."
+         , files_number, files_bytes, (double)files_bytes/files_number
+         , extents_number, extents_bytes, (double)extents_bytes/extents_number
+         , page_compressed_sizes
+         );
 }
--- a/database/engine/journalfile.c
+++ b/database/engine/journalfile.c
@ -302,8 +302,8 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden
        Pvoid_t *PValue;
        struct pg_cache_page_index *page_index = NULL;

-        if (PAGE_METRICS != jf_metric_data->descr[i].type) {
-            error("Unknown page type encountered.");
+        if (jf_metric_data->descr[i].type > PAGE_TYPE_MAX) {
+            error("Unknown page type %d encountered.", jf_metric_data->descr[i].type );
            continue;
        }
        temp_id = (uuid_t *)jf_metric_data->descr[i].uuid;
@ -331,6 +331,7 @@ static void restore_extent_metadata(struct rrdengine_instance *ctx, struct rrden
        descr->end_time = jf_metric_data->descr[i].end_time;
        descr->id = &page_index->id;
        descr->extent = extent;
+        descr->type = jf_metric_data->descr[i].type;
        extent->pages[valid_pages++] = descr;
        pg_cache_insert(ctx, page_index, descr);
    }
--- a/database/engine/pagecache.c
+++ b/database/engine/pagecache.c
@ -1194,24 +1194,66 @@ void init_page_cache(struct rrdengine_instance *ctx)
    init_committed_page_index(ctx);
 }

+
+
+/*
+ * METRIC                                            # number
+ * 1. INDEX: JudyHS                                  # bytes
+ * 2. DATA: page_index                               # bytes
+ *
+ * PAGE (1 page of 1 metric)                         # number
+ * 1. INDEX AT METRIC: page_index->JudyL_array       # bytes
+ * 2. DATA: descr                                    # bytes
+ *
+ * PAGE CACHE (1 page of 1 metric at the cache)      # number
+ * 1. pg_cache_descr (if PG_CACHE_DESCR_ALLOCATED)   # bytes
+ * 2. data (if RRD_PAGE_POPULATED)                   # bytes
+ *
+ */
+
+
 void free_page_cache(struct rrdengine_instance *ctx)
 {
    struct page_cache *pg_cache = &ctx->pg_cache;
-    Word_t ret_Judy, bytes_freed = 0;
    Pvoid_t *PValue;
    struct pg_cache_page_index *page_index, *prev_page_index;
    Word_t Index;
    struct rrdeng_page_descr *descr;
    struct page_cache_descr *pg_cache_descr;

+    Word_t metrics_number      = 0,
+           metrics_bytes       = 0,
+           metrics_index_bytes = 0,
+           metrics_duration    = 0;
+
+    Word_t pages_number        = 0,
+           pages_bytes         = 0,
+           pages_index_bytes   = 0;
+
+    Word_t pages_size_per_type[256]  = { 0 },
+           pages_count_per_type[256] = { 0 };
+
+    Word_t cache_pages_number  = 0,
+           cache_pages_bytes   = 0,
+           cache_pages_data_bytes  = 0;
+
+    size_t points_in_db        = 0,
+           uncompressed_points_size = 0,
+           seconds_in_db       = 0,
+           single_point_pages  = 0;
+
+    Word_t pages_dirty_index_bytes = 0;
+
+    usec_t oldest_time_ut = LONG_MAX, latest_time_ut = 0;
+
    /* Free committed page index */
-    ret_Judy = JudyLFreeArray(&pg_cache->committed_page_index.JudyL_array, PJE0);
+    pages_dirty_index_bytes = JudyLFreeArray(&pg_cache->committed_page_index.JudyL_array, PJE0);
    fatal_assert(NULL == pg_cache->committed_page_index.JudyL_array);
-    bytes_freed += ret_Judy;

    for (page_index = pg_cache->metrics_index.last_page_index ;
         page_index != NULL ;
         page_index = prev_page_index) {
+
        prev_page_index = page_index->prev;

        /* Find first page in range */
@ -1219,37 +1261,116 @@ void free_page_cache(struct rrdengine_instance *ctx)
        PValue = JudyLFirst(page_index->JudyL_array, &Index, PJE0);
        descr = unlikely(NULL == PValue) ? NULL : *PValue;

+        size_t metric_duration = 0;
+        size_t metric_update_every = 0;
+        size_t metric_single_point_pages = 0;
+
        while (descr != NULL) {
            /* Iterate all page descriptors of this metric */

            if (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) {
+                cache_pages_number++;
+
                /* Check rrdenglocking.c */
                pg_cache_descr = descr->pg_cache_descr;
                if (pg_cache_descr->flags & RRD_PAGE_POPULATED) {
                    dbengine_page_free(pg_cache_descr->page);
-                    bytes_freed += RRDENG_BLOCK_SIZE;
+                    cache_pages_data_bytes += RRDENG_BLOCK_SIZE;
                }
                rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr);
-                bytes_freed += sizeof(*pg_cache_descr);
+                cache_pages_bytes += sizeof(*pg_cache_descr);
            }
+
+            if(descr->start_time < oldest_time_ut)
+                oldest_time_ut = descr->start_time;
+
+            if(descr->end_time > latest_time_ut)
+                latest_time_ut = descr->end_time;
+
+            pages_size_per_type[descr->type] += descr->page_length;
+            pages_count_per_type[descr->type]++;
+
+            size_t points_in_page = (descr->page_length / ctx->storage_size);
+            size_t page_duration  = ((descr->end_time - descr->start_time) / USEC_PER_SEC);
+            size_t update_every = (page_duration == 0) ? 1 : page_duration / (points_in_page - 1);
+
+            if (!page_duration && metric_update_every) {
+                page_duration = metric_update_every;
+                update_every = metric_update_every;
+            }
+            else if(page_duration)
+                metric_update_every = update_every;
+
+            uncompressed_points_size += descr->page_length;
+
+            if(page_duration > 0) {
+                page_duration = update_every * points_in_page;
+                metric_duration += page_duration;
+                seconds_in_db += page_duration;
+                points_in_db += descr->page_length / ctx->storage_size;
+            }
+            else
+                metric_single_point_pages++;
+
            freez(descr);
-            bytes_freed += sizeof(*descr);
+            pages_bytes += sizeof(*descr);
+            pages_number++;

            PValue = JudyLNext(page_index->JudyL_array, &Index, PJE0);
            descr = unlikely(NULL == PValue) ? NULL : *PValue;
        }

+        if(metric_single_point_pages && metric_update_every) {
+            points_in_db += metric_single_point_pages;
+            seconds_in_db += metric_update_every * metric_single_point_pages;
+            metric_duration += metric_update_every * metric_single_point_pages;
+        }
+        else
+            single_point_pages += metric_single_point_pages;
+
        /* Free page index */
-        ret_Judy = JudyLFreeArray(&page_index->JudyL_array, PJE0);
+        pages_index_bytes += JudyLFreeArray(&page_index->JudyL_array, PJE0);
        fatal_assert(NULL == page_index->JudyL_array);
-        bytes_freed += ret_Judy;
        freez(page_index);
-        bytes_freed += sizeof(*page_index);
+
+        metrics_number++;
+        metrics_bytes += sizeof(*page_index);
+        metrics_duration += metric_duration;
    }
    /* Free metrics index */
-    ret_Judy = JudyHSFreeArray(&pg_cache->metrics_index.JudyHS_array, PJE0);
+    metrics_index_bytes = JudyHSFreeArray(&pg_cache->metrics_index.JudyHS_array, PJE0);
    fatal_assert(NULL == pg_cache->metrics_index.JudyHS_array);
-    bytes_freed += ret_Judy;

-    info("Freed %lu bytes of memory from page cache.", bytes_freed);
+    if(!metrics_number) metrics_number = 1;
+    if(!pages_number) pages_number = 1;
+    if(!cache_pages_number) cache_pages_number = 1;
+    if(!points_in_db) points_in_db = 1;
+    if(latest_time_ut == oldest_time_ut) oldest_time_ut -= USEC_PER_SEC;
+
+    if(single_point_pages) {
+        long double avg_duration = (long double)seconds_in_db / points_in_db;
+        points_in_db += single_point_pages;
+        seconds_in_db += (size_t)(avg_duration * single_point_pages);
+    }
+
+    info("DBENGINE STATISTICS ON METRICS:"
+         " Metrics: %lu (structures %lu bytes - per metric %0.2f, index (HS) %lu bytes - per metric %0.2f bytes - duration %zu secs) |"
+         " Page descriptors: %lu (structures %lu bytes - per page %0.2f bytes, index (L) %lu bytes - per page %0.2f, dirty index %lu bytes). |"
+         " Page cache: %lu pages (structures %lu bytes - per page %0.2f bytes, data %lu bytes). |"
+         " Points in db %zu, uncompressed size of points database %zu bytes. |"
+         " Duration of all points %zu seconds, average point duration %0.2f seconds."
+         " Duration of the database %llu seconds, average metric duration %0.2f seconds, average metric lifetime %0.2f%%."
+         , metrics_number, metrics_bytes, (double)metrics_bytes/metrics_number, metrics_index_bytes, (double)metrics_index_bytes/metrics_number, metrics_duration
+         , pages_number, pages_bytes, (double)pages_bytes/pages_number, pages_index_bytes, (double)pages_index_bytes/pages_number, pages_dirty_index_bytes
+         , cache_pages_number, cache_pages_bytes, (double)cache_pages_bytes/cache_pages_number, cache_pages_data_bytes
+         , points_in_db, uncompressed_points_size
+         , seconds_in_db, (double)seconds_in_db/points_in_db
+         , (latest_time_ut - oldest_time_ut) / USEC_PER_SEC, (double)metrics_duration/metrics_number
+         , (double)metrics_duration/metrics_number * 100.0 / ((latest_time_ut - oldest_time_ut) / USEC_PER_SEC)
+         );
+
+    for(int i = 0; i < 256 ;i++) {
+        if(pages_count_per_type[i])
+            info("DBENGINE STATISTICS ON PAGE TYPES: page type %d total pages %lu, average page size %0.2f bytes", i, pages_count_per_type[i], (double)pages_size_per_type[i]/pages_count_per_type[i]);
+    }
 }
--- a/database/engine/pagecache.h
+++ b/database/engine/pagecache.h
@ -63,6 +63,7 @@ struct rrdeng_page_descr {
    usec_t start_time;
    usec_t end_time;
    uint32_t page_length;
+    uint8_t type;
 };

 #define PAGE_INFO_SCRATCH_SZ (8)
--- a/database/engine/rrddiskprotocol.h
+++ b/database/engine/rrddiskprotocol.h
@ -35,7 +35,8 @@ struct rrdeng_df_sb {
 * Page types
 */
 #define PAGE_METRICS    (0)
-#define PAGE_LOGS       (1) /* reserved */
+#define PAGE_TIER       (1)
+#define PAGE_TYPE_MAX   (1)

 /*
 * Data file page descriptor
--- a/database/engine/rrdengine.c
+++ b/database/engine/rrdengine.c
@ -781,7 +781,7 @@ static int do_flush_pages(struct rrdengine_worker_config* wc, int force, struct
        xt_io_descr->descr_commit_idx_array[i] = descr_commit_idx_array[i];

        descr = xt_io_descr->descr_array[i];
-        header->descr[i].type = PAGE_METRICS;
+        header->descr[i].type = descr->type;
        uuid_copy(*(uuid_t *)header->descr[i].uuid, *descr->id);
        header->descr[i].page_length = descr->page_length;
        header->descr[i].start_time = descr->start_time;
@ -1339,7 +1339,7 @@ void rrdengine_main(void)
    struct rrdengine_instance *ctx;

    sanity_check();
-    ret = rrdeng_init(NULL, &ctx, "/tmp", RRDENG_MIN_PAGE_CACHE_SIZE_MB, RRDENG_MIN_DISK_SPACE_MB);
+    ret = rrdeng_init(NULL, &ctx, "/tmp", RRDENG_MIN_PAGE_CACHE_SIZE_MB, RRDENG_MIN_DISK_SPACE_MB, 0);
    if (ret) {
        exit(ret);
    }
--- a/database/engine/rrdengine.h
+++ b/database/engine/rrdengine.h
@ -35,6 +35,7 @@ struct rrdengine_instance;
 #define RRDENG_FILE_NUMBER_PRINT_TMPL "%1.1u-%10.10u"

 struct rrdeng_collect_handle {
+    struct rrdeng_metric_handle *metric_handle;
    struct rrdeng_page_descr *descr;
    unsigned long page_correlation_id;
    struct rrdengine_instance *ctx;
@ -43,6 +44,7 @@ struct rrdeng_collect_handle {
 };

 struct rrdeng_query_handle {
+    struct rrdeng_metric_handle *metric_handle;
    struct rrdeng_page_descr *descr;
    struct rrdengine_instance *ctx;
    struct pg_cache_page_index *page_index;
@ -50,6 +52,7 @@ struct rrdeng_query_handle {
    time_t now;
    unsigned position;
    unsigned entries;
+    TIER_QUERY_FETCH tier_query_fetch_type;
    storage_number *page;
    usec_t page_end_time;
    uint32_t page_length;
@ -239,12 +242,15 @@ struct rrdengine_instance {
    char machine_guid[GUID_LEN + 1]; /* the unique ID of the corresponding host, or localhost for multihost DB */
    uint64_t disk_space;
    uint64_t max_disk_space;
+    int tier;
+    size_t storage_size;
    unsigned last_fileno; /* newest index of datafile and journalfile */
    unsigned long max_cache_pages;
    unsigned long cache_pages_low_watermark;
    unsigned long metric_API_max_producers;

    uint8_t quiesce; /* set to SET_QUIESCE before shutdown of the engine */
+    uint8_t page_type; /* set to SET_QUIESCE before shutdown of the engine */

    struct rrdengine_statistics stats;
 };
--- a/database/engine/rrdengineapi.c
+++ b/database/engine/rrdengineapi.c
@ -2,7 +2,23 @@
 #include "rrdengine.h"

 /* Default global database instance */
-struct rrdengine_instance multidb_ctx;
+struct rrdengine_instance multidb_ctx_storage_tier0;
+struct rrdengine_instance multidb_ctx_storage_tier1;
+struct rrdengine_instance multidb_ctx_storage_tier2;
+struct rrdengine_instance multidb_ctx_storage_tier3;
+struct rrdengine_instance multidb_ctx_storage_tier4;
+#if RRD_STORAGE_TIERS != 5
+#error RRD_STORAGE_TIERS is not 5 - you need to add allocations here
+#endif
+struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS];
+
+__attribute__((constructor)) void initialize_multidb_ctx(void) {
+    multidb_ctx[0] = &multidb_ctx_storage_tier0;
+    multidb_ctx[1] = &multidb_ctx_storage_tier1;
+    multidb_ctx[2] = &multidb_ctx_storage_tier2;
+    multidb_ctx[3] = &multidb_ctx_storage_tier3;
+    multidb_ctx[4] = &multidb_ctx_storage_tier4;
+}

 int db_engine_use_malloc = 0;
 int default_rrdeng_page_fetch_timeout = 3;
@ -13,9 +29,10 @@ int default_multidb_disk_quota_mb = 256;
 /* Default behaviour is to unblock data collection if the page cache is full of dirty pages by dropping metrics */
 uint8_t rrdeng_drop_metrics_under_page_cache_pressure = 1;

-static inline struct rrdengine_instance *get_rrdeng_ctx_from_host(RRDHOST *host)
-{
-    return host->rrdeng_ctx;
+static inline struct rrdengine_instance *get_rrdeng_ctx_from_host(RRDHOST *host, int tier) {
+    if(tier < 0 || tier >= RRD_STORAGE_TIERS) tier = 0;
+    if(!host->storage_instance[tier]) tier = 0;
+    return (struct rrdengine_instance *)host->storage_instance[tier];
 }

 /* This UUID is not unique across hosts */
@ -52,10 +69,20 @@ void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN + 1], uu
    memcpy(ret_uuid, hash_value, sizeof(uuid_t));
 }

-void rrdeng_metric_init(RRDDIM *rd)
-{
-    struct page_cache *pg_cache;
+struct rrdeng_metric_handle {
+    RRDDIM *rd;
    struct rrdengine_instance *ctx;
+    uuid_t *rrdeng_uuid;                            // database engine metric UUID
+    struct pg_cache_page_index *page_index;
+};
+
+void rrdeng_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle) {
+    freez(db_metric_handle);
+}
+
+STORAGE_METRIC_HANDLE *rrdeng_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance) {
+    struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance;
+    struct page_cache *pg_cache;
    uuid_t legacy_uuid;
    uuid_t multihost_legacy_uuid;
    Pvoid_t *PValue;
@ -63,15 +90,10 @@ void rrdeng_metric_init(RRDDIM *rd)
    int is_multihost_child = 0;
    RRDHOST *host = rd->rrdset->rrdhost;

-    ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
-    if (unlikely(!ctx)) {
-        error("Failed to fetch multidb context");
-        return;
-    }
    pg_cache = &ctx->pg_cache;

    rrdeng_generate_legacy_uuid(rd->id, rd->rrdset->id, &legacy_uuid);
-    if (host != localhost && host->rrdeng_ctx == &multidb_ctx)
+    if (host != localhost && is_storage_engine_shared((STORAGE_INSTANCE *)ctx))
        is_multihost_child = 1;

    uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
@ -85,16 +107,16 @@ void rrdeng_metric_init(RRDDIM *rd)
         * Drop legacy support, normal path */

        uv_rwlock_rdlock(&pg_cache->metrics_index.lock);
-        PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &rd->state->metric_uuid, sizeof(uuid_t));
+        PValue = JudyHSGet(pg_cache->metrics_index.JudyHS_array, &rd->metric_uuid, sizeof(uuid_t));
        if (likely(NULL != PValue)) {
            page_index = *PValue;
        }
        uv_rwlock_rdunlock(&pg_cache->metrics_index.lock);
        if (NULL == PValue) {
            uv_rwlock_wrlock(&pg_cache->metrics_index.lock);
-            PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, &rd->state->metric_uuid, sizeof(uuid_t), PJE0);
+            PValue = JudyHSIns(&pg_cache->metrics_index.JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0);
            fatal_assert(NULL == *PValue); /* TODO: figure out concurrency model */
-            *PValue = page_index = create_page_index(&rd->state->metric_uuid);
+            *PValue = page_index = create_page_index(&rd->metric_uuid);
            page_index->prev = pg_cache->metrics_index.last_page_index;
            pg_cache->metrics_index.last_page_index = page_index;
            uv_rwlock_wrunlock(&pg_cache->metrics_index.lock);
@ -105,52 +127,56 @@ void rrdeng_metric_init(RRDDIM *rd)
        rrdeng_convert_legacy_uuid_to_multihost(rd->rrdset->rrdhost->machine_guid, &legacy_uuid,
                                                &multihost_legacy_uuid);

-        int need_to_store = uuid_compare(rd->state->metric_uuid, multihost_legacy_uuid);
+        int need_to_store = uuid_compare(rd->metric_uuid, multihost_legacy_uuid);

-        uuid_copy(rd->state->metric_uuid, multihost_legacy_uuid);
+        uuid_copy(rd->metric_uuid, multihost_legacy_uuid);

-        if (unlikely(need_to_store))
-            (void)sql_store_dimension(&rd->state->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor,
+        if (unlikely(need_to_store && !ctx->tier))
+            (void)sql_store_dimension(&rd->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor,
                rd->algorithm);
-
    }
-    rd->state->rrdeng_uuid = &page_index->id;
-    rd->state->page_index = page_index;
+
+    struct rrdeng_metric_handle *mh = mallocz(sizeof(struct rrdeng_metric_handle));
+    mh->rd = rd;
+    mh->ctx = ctx;
+    mh->rrdeng_uuid = &page_index->id;
+    mh->page_index = page_index;
+    return (STORAGE_METRIC_HANDLE *)mh;
 }

 /*
 * Gets a handle for storing metrics to the database.
 * The handle must be released with rrdeng_store_metric_final().
 */
-void rrdeng_store_metric_init(RRDDIM *rd)
-{
+STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle) {
+    struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle;
+
    struct rrdeng_collect_handle *handle;
-    struct rrdengine_instance *ctx;
    struct pg_cache_page_index *page_index;

-    ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
-
    handle = callocz(1, sizeof(struct rrdeng_collect_handle));
-    handle->ctx = ctx;
+    handle->metric_handle = metric_handle;
+    handle->ctx = metric_handle->ctx;
    handle->descr = NULL;
    handle->unaligned_page = 0;
-    rd->state->handle = (STORAGE_COLLECT_HANDLE *)handle;

-    page_index = rd->state->page_index;
+    page_index = metric_handle->page_index;
    uv_rwlock_wrlock(&page_index->lock);
    ++page_index->writers;
    uv_rwlock_wrunlock(&page_index->lock);
+
+    return (STORAGE_COLLECT_HANDLE *)handle;
 }

 /* The page must be populated and referenced */
-static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr)
+static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr, size_t storage_size)
 {
    unsigned i;
    uint8_t has_only_empty_metrics = 1;
    storage_number *page;

    page = descr->pg_cache_descr->page;
-    for (i = 0 ; i < descr->page_length / sizeof(storage_number); ++i) {
+    for (i = 0 ; i < descr->page_length / storage_size; ++i) {
        if (SN_EMPTY_SLOT != page[i]) {
            has_only_empty_metrics = 0;
            break;
@ -159,26 +185,21 @@ static int page_has_only_empty_metrics(struct rrdeng_page_descr *descr)
    return has_only_empty_metrics;
 }

-void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
-{
-    struct rrdeng_collect_handle *handle;
-    struct rrdengine_instance *ctx;
-    struct rrdeng_page_descr *descr;
+void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle) {
+    struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
+    // struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle;
+    struct rrdengine_instance *ctx = handle->ctx;
+    struct rrdeng_page_descr *descr = handle->descr;
+
+    if (unlikely(!ctx)) return;
+    if (unlikely(!descr)) return;

-    handle = (struct rrdeng_collect_handle *)rd->state->handle;
-    ctx = handle->ctx;
-    if (unlikely(!ctx))
-        return;
-    descr = handle->descr;
-    if (unlikely(NULL == descr)) {
-        return;
-    }
    if (likely(descr->page_length)) {
        int page_is_empty;

        rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);

-        page_is_empty = page_has_only_empty_metrics(descr);
+        page_is_empty = page_has_only_empty_metrics(descr, ctx->storage_size);
        if (page_is_empty) {
            debug(D_RRDENGINE, "Page has empty metrics only, deleting:");
            if (unlikely(debug_flags & D_RRDENGINE))
@ -195,20 +216,23 @@ void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
    handle->descr = NULL;
 }

-void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE n, SN_FLAGS flags)
+void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE n,
+                              NETDATA_DOUBLE min_value,
+                              NETDATA_DOUBLE max_value,
+                              uint16_t count,
+                              uint16_t anomaly_count,
+                              SN_FLAGS flags)
 {
-    storage_number number = pack_storage_number(n, flags);
+    struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
+    struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle;
+    struct rrdengine_instance *ctx = handle->ctx;
+    struct page_cache *pg_cache = &ctx->pg_cache;
+    struct rrdeng_page_descr *descr = handle->descr;
+    RRDDIM *rd = metric_handle->rd;

-    struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)rd->state->handle;
-    struct rrdengine_instance *ctx;
-    struct page_cache *pg_cache;
-    struct rrdeng_page_descr *descr;
-    storage_number *page;
+    void *page;
    uint8_t must_flush_unaligned_page = 0, perfect_page_alignment = 0;
-
-    ctx = handle->ctx;
-    pg_cache = &ctx->pg_cache;
-    descr = handle->descr;
+    size_t storage_size = ctx->storage_size;

    if (descr) {
        /* Make alignment decisions */
@ -218,7 +242,7 @@ void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE n
            perfect_page_alignment = 1;
        }
        /* is the metric far enough out of alignment with the others? */
-        if (unlikely(descr->page_length + sizeof(number) < rd->rrdset->rrddim_page_alignment)) {
+        if (unlikely(descr->page_length + storage_size < rd->rrdset->rrddim_page_alignment)) {
            handle->unaligned_page = 1;
            debug(D_RRDENGINE, "Metric page is not aligned with chart:");
            if (unlikely(debug_flags & D_RRDENGINE))
@ -226,18 +250,18 @@ void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE n
        }
        if (unlikely(handle->unaligned_page &&
                     /* did the other metrics change page? */
-                     rd->rrdset->rrddim_page_alignment <= sizeof(number))) {
+                     rd->rrdset->rrddim_page_alignment <= storage_size)) {
            debug(D_RRDENGINE, "Flushing unaligned metric page.");
            must_flush_unaligned_page = 1;
            handle->unaligned_page = 0;
        }
    }
    if (unlikely(NULL == descr ||
-                 descr->page_length + sizeof(number) > RRDENG_BLOCK_SIZE ||
+                 descr->page_length + storage_size > RRDENG_BLOCK_SIZE ||
                 must_flush_unaligned_page)) {
-        rrdeng_store_metric_flush_current_page(rd);
+        rrdeng_store_metric_flush_current_page(collection_handle);

-        page = rrdeng_create_page(ctx, &rd->state->page_index->id, &descr);
+        page = rrdeng_create_page(ctx, &metric_handle->page_index->id, &descr);
        fatal_assert(page);

        handle->descr = descr;
@ -249,9 +273,37 @@ void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE n
            perfect_page_alignment = 1;
        }
    }
+
    page = descr->pg_cache_descr->page;
-    page[descr->page_length / sizeof(number)] = number;
-    pg_cache_atomic_set_pg_info(descr, point_in_time, descr->page_length + sizeof(number));
+
+    switch (descr->type) {
+        case PAGE_METRICS: {
+            ((storage_number *)page)[descr->page_length / storage_size] = pack_storage_number(n, flags);
+        }
+        break;
+
+        case PAGE_TIER: {
+            storage_number_tier1_t number_tier1;
+            number_tier1.sum_value = (float)n;
+            number_tier1.min_value = (float)min_value;
+            number_tier1.max_value = (float)max_value;
+            number_tier1.anomaly_count = anomaly_count;
+            number_tier1.count = count;
+            ((storage_number_tier1_t *)page)[descr->page_length / storage_size] = number_tier1;
+        }
+        break;
+
+        default: {
+            static bool logged = false;
+            if(!logged) {
+                error("DBENGINE: cannot store metric on unknown page type id %d", descr->type);
+                logged = true;
+            }
+        }
+        break;
+    }
+
+    pg_cache_atomic_set_pg_info(descr, point_in_time, descr->page_length + storage_size);

    if (perfect_page_alignment)
        rd->rrdset->rrddim_page_alignment = descr->page_length;
@ -271,9 +323,9 @@ void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE n
            }
        }

-        pg_cache_insert(ctx, rd->state->page_index, descr);
+        pg_cache_insert(ctx, metric_handle->page_index, descr);
    } else {
-        pg_cache_add_new_metric_time(rd->state->page_index, descr);
+        pg_cache_add_new_metric_time(metric_handle->page_index, descr);
    }
 }

@ -281,15 +333,14 @@ void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE n
 * Releases the database reference from the handle for storing metrics.
 * Returns 1 if it's safe to delete the dimension.
 */
-int rrdeng_store_metric_finalize(RRDDIM *rd)
-{
-    struct rrdeng_collect_handle *handle;
-    struct pg_cache_page_index *page_index;
+int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
+    struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle;
+    struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)handle->metric_handle;
+    struct pg_cache_page_index *page_index = metric_handle->page_index;
+
    uint8_t can_delete_metric = 0;

-    handle = (struct rrdeng_collect_handle *)rd->state->handle;
-    page_index = rd->state->page_index;
-    rrdeng_store_metric_flush_current_page(rd);
+    rrdeng_store_metric_flush_current_page(collection_handle);
    uv_rwlock_wrlock(&page_index->lock);
    if (!--page_index->writers && !page_index->page_count) {
        can_delete_metric = 1;
@ -297,242 +348,52 @@ int rrdeng_store_metric_finalize(RRDDIM *rd)
    uv_rwlock_wrunlock(&page_index->lock);
    freez(handle);

-   return can_delete_metric;
-}
-
-/* Returns 1 if the data collection interval is well defined, 0 otherwise */
-static int metrics_with_known_interval(struct rrdeng_page_descr *descr)
-{
-    unsigned page_entries;
-
-    if (unlikely(INVALID_TIME == descr->start_time || INVALID_TIME == descr->end_time))
-        return 0;
-    page_entries = descr->page_length / sizeof(storage_number);
-    if (likely(page_entries > 1)) {
-        return 1;
-    }
-    return 0;
-}
-
-static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info)
-{
-    return (uint32_t *)&page_info->scratch[0];
-}
-
-static inline uint32_t *pginfo_to_points(struct rrdeng_page_info *page_info)
-{
-    return (uint32_t *)&page_info->scratch[sizeof(uint32_t)];
-}
-
-/**
- * Calculates the regions of different data collection intervals in a netdata chart in the time range
- * [start_time,end_time]. This call takes the netdata chart read lock.
- * @param st the netdata chart whose data collection interval boundaries are calculated.
- * @param start_time inclusive starting time in usec
- * @param end_time inclusive ending time in usec
- * @param region_info_arrayp It allocates (*region_info_arrayp) and populates it with information of regions of a
- *         reference dimension that that have different data collection intervals and overlap with the time range
- *         [start_time,end_time]. The caller must free (*region_info_arrayp) with freez(). If region_info_arrayp is set
- *         to NULL nothing was allocated.
- * @param max_intervalp is dereferenced and set to be the largest data collection interval of all regions.
- * @return number of regions with different data collection intervals.
- */
-unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time,
-                                         struct rrdeng_region_info **region_info_arrayp, unsigned *max_intervalp, struct context_param *context_param_list)
-{
-    struct pg_cache_page_index *page_index;
-    struct rrdengine_instance *ctx;
-    unsigned pages_nr;
-    RRDDIM *rd_iter, *rd;
-    struct rrdeng_page_info *page_info_array, *curr, *prev, *old_prev;
-    unsigned i, j, page_entries, region_points, page_points, regions, max_interval;
-    time_t now;
-    usec_t dt, current_position_time, max_time = 0, min_time, curr_time, first_valid_time_in_page;
-    struct rrdeng_region_info *region_info_array;
-    uint8_t is_first_region_initialized;
-
-    ctx = get_rrdeng_ctx_from_host(st->rrdhost);
-    regions = 1;
-    *max_intervalp = max_interval = 0;
-    region_info_array = NULL;
-    *region_info_arrayp = NULL;
-    page_info_array = NULL;
-
-    RRDDIM *temp_rd = context_param_list ? context_param_list->rd : NULL;
-    rrdset_rdlock(st);
-    for(rd_iter = temp_rd?temp_rd:st->dimensions, rd = NULL, min_time = (usec_t)-1 ; rd_iter ; rd_iter = rd_iter->next) {
-        /*
-         * Choose oldest dimension as reference. This is not equivalent to the union of all dimensions
-         * but it is a best effort approximation with a bias towards older metrics in a chart. It
-         * matches netdata behaviour in the sense that dimensions are generally aligned in a chart
-         * and older dimensions contain more information about the time range. It does not work well
-         * for metrics that have recently stopped being collected.
-         */
-        curr_time = pg_cache_oldest_time_in_range(ctx, rd_iter->state->rrdeng_uuid,
-                                                  start_time * USEC_PER_SEC, end_time * USEC_PER_SEC);
-        if (INVALID_TIME != curr_time && curr_time < min_time) {
-            rd = rd_iter;
-            min_time = curr_time;
-        }
-    }
-    rrdset_unlock(st);
-    if (NULL == rd) {
-        return 1;
-    }
-    pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
-                                &page_info_array, &page_index);
-    if (pages_nr) {
-        /* conservative allocation, will reduce the size later if necessary */
-        region_info_array = mallocz(sizeof(*region_info_array) * pages_nr);
-    }
-    is_first_region_initialized = 0;
-    region_points = 0;
-
-    int is_out_of_order_reported = 0;
-    /* pages loop */
-    for (i = 0, curr = NULL, prev = NULL ; i < pages_nr ; ++i) {
-        old_prev = prev;
-        prev = curr;
-        curr = &page_info_array[i];
-        *pginfo_to_points(curr) = 0; /* initialize to invalid page */
-        *pginfo_to_dt(curr) = 0; /* no known data collection interval yet */
-        if (unlikely(INVALID_TIME == curr->start_time || INVALID_TIME == curr->end_time ||
-                     curr->end_time < curr->start_time)) {
-            info("Ignoring page with invalid timestamps.");
-            prev = old_prev;
-            continue;
-        }
-        page_entries = curr->page_length / sizeof(storage_number);
-        fatal_assert(0 != page_entries);
-        if (likely(1 != page_entries)) {
-            dt = (curr->end_time - curr->start_time) / (page_entries - 1);
-            *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(dt);
-            if (unlikely(0 == *pginfo_to_dt(curr)))
-                *pginfo_to_dt(curr) = 1;
-        } else {
-            dt = 0;
-        }
-        for (j = 0, page_points = 0 ; j < page_entries ; ++j) {
-            uint8_t is_metric_out_of_order, is_metric_earlier_than_range;
-
-            is_metric_earlier_than_range = 0;
-            is_metric_out_of_order = 0;
-
-            current_position_time = curr->start_time + j * dt;
-            now = current_position_time / USEC_PER_SEC;
-            if (now > end_time) { /* there will be no more pages in the time range */
-                break;
-            }
-            if (now < start_time)
-                is_metric_earlier_than_range = 1;
-            if (unlikely(current_position_time < max_time)) /* just went back in time */
-                is_metric_out_of_order = 1;
-            if (is_metric_earlier_than_range || unlikely(is_metric_out_of_order)) {
-                if (unlikely(is_metric_out_of_order))
-                    is_out_of_order_reported++;
-                continue; /* next entry */
-            }
-            /* here is a valid metric */
-            ++page_points;
-            region_info_array[regions - 1].points = ++region_points;
-            max_time = current_position_time;
-            if (1 == page_points)
-                first_valid_time_in_page = current_position_time;
-            if (unlikely(!is_first_region_initialized)) {
-                fatal_assert(1 == regions);
-                /* this is the first region */
-                region_info_array[0].start_time = current_position_time;
-                is_first_region_initialized = 1;
-            }
-        }
-        *pginfo_to_points(curr) = page_points;
-        if (0 == page_points) {
-            prev = old_prev;
-            continue;
-        }
-
-        if (unlikely(0 == *pginfo_to_dt(curr))) { /* unknown data collection interval */
-            fatal_assert(1 == page_points);
-
-            if (likely(NULL != prev)) { /* get interval from previous page */
-                *pginfo_to_dt(curr) = *pginfo_to_dt(prev);
-            } else { /* there is no previous page in the query */
-                struct rrdeng_page_info db_page_info;
-
-                /* go to database */
-                pg_cache_get_filtered_info_prev(ctx, page_index, curr->start_time,
-                                                metrics_with_known_interval, &db_page_info);
-                if (unlikely(db_page_info.start_time == INVALID_TIME || db_page_info.end_time == INVALID_TIME ||
-                             0 == db_page_info.page_length)) { /* nothing in the database, default to update_every */
-                    *pginfo_to_dt(curr) = rd->update_every;
-                } else {
-                    unsigned db_entries;
-                    usec_t db_dt;
-
-                    db_entries = db_page_info.page_length / sizeof(storage_number);
-                    db_dt = (db_page_info.end_time - db_page_info.start_time) / (db_entries - 1);
-                    *pginfo_to_dt(curr) = ROUND_USEC_TO_SEC(db_dt);
-                    if (unlikely(0 == *pginfo_to_dt(curr)))
-                        *pginfo_to_dt(curr) = 1;
-
-                }
-            }
-        }
-        if (likely(prev) && unlikely(*pginfo_to_dt(curr) != *pginfo_to_dt(prev))) {
-            info("Data collection interval change detected in query: %"PRIu32" -> %"PRIu32,
-                 *pginfo_to_dt(prev), *pginfo_to_dt(curr));
-            region_info_array[regions++ - 1].points -= page_points;
-            region_info_array[regions - 1].points = region_points = page_points;
-            region_info_array[regions - 1].start_time = first_valid_time_in_page;
-        }
-        if (*pginfo_to_dt(curr) > max_interval)
-            max_interval = *pginfo_to_dt(curr);
-        region_info_array[regions - 1].update_every = *pginfo_to_dt(curr);
-    }
-    if (page_info_array)
-        freez(page_info_array);
-    if (region_info_array) {
-        if (likely(is_first_region_initialized)) {
-            /* free unnecessary memory */
-            region_info_array = reallocz(region_info_array, sizeof(*region_info_array) * regions);
-            *region_info_arrayp = region_info_array;
-            *max_intervalp = max_interval;
-        } else {
-            /* empty result */
-            freez(region_info_array);
-        }
-    }
-    if (is_out_of_order_reported)
-        info("Ignored %d metrics with out of order timestamp in %u regions.", is_out_of_order_reported, regions);
-    return regions;
+    return can_delete_metric;
 }

+//static inline uint32_t *pginfo_to_dt(struct rrdeng_page_info *page_info)
+//{
+//    return (uint32_t *)&page_info->scratch[0];
+//}
+//
+//static inline uint32_t *pginfo_to_points(struct rrdeng_page_info *page_info)
+//{
+//    return (uint32_t *)&page_info->scratch[sizeof(uint32_t)];
+//}
+//
 /*
 * Gets a handle for loading metrics from the database.
 * The handle must be released with rrdeng_load_metric_final().
 */
-void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time)
+void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *rrdimm_handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type)
 {
+    struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle;
+    struct rrdengine_instance *ctx = metric_handle->ctx;
+    RRDDIM *rd = metric_handle->rd;
+
    // fprintf(stderr, "%s: %s/%s start time %ld, end time %ld\n", __FUNCTION__ , rd->rrdset->name, rd->name, start_time, end_time);

    struct rrdeng_query_handle *handle;
-    struct rrdengine_instance *ctx;
    unsigned pages_nr;

-    ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
    rrdimm_handle->start_time = start_time;
    rrdimm_handle->end_time = end_time;

    handle = callocz(1, sizeof(struct rrdeng_query_handle));
    handle->next_page_time = start_time;
    handle->now = start_time;
-    handle->dt = rd->update_every * USEC_PER_SEC;
-    handle->dt_sec = rd->update_every;
+    handle->tier_query_fetch_type = tier_query_fetch_type;
+    // TODO we should store the dt of each page in each page
+    // this will produce wrong values for dt in case the user changes
+    // the update every of the charts or the tier grouping iterations
+    handle->dt_sec = get_tier_grouping(ctx->tier) * (time_t)rd->update_every;
+    handle->dt = handle->dt_sec * USEC_PER_SEC;
    handle->position = 0;
    handle->ctx = ctx;
+    handle->metric_handle = metric_handle;
    handle->descr = NULL;
    rrdimm_handle->handle = (STORAGE_QUERY_HANDLE *)handle;
-    pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
+    pages_nr = pg_cache_preload(ctx, metric_handle->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
                                NULL, &handle->page_index);
    if (unlikely(NULL == handle->page_index || 0 == pages_nr))
        // there are no metrics to load
@ -580,7 +441,7 @@ static int rrdeng_load_page_next(struct rrddim_query_handle *rrdimm_handle) {

    if (unlikely(descr->start_time != page_end_time && next_page_time > descr->start_time)) {
        // we're in the middle of the page somewhere
-        unsigned entries = page_length / sizeof(storage_number);
+        unsigned entries = page_length / ctx->storage_size;
        position = ((uint64_t)(next_page_time - descr->start_time)) * (entries - 1) /
                   (page_end_time - descr->start_time);
    }
@ -590,9 +451,14 @@ static int rrdeng_load_page_next(struct rrddim_query_handle *rrdimm_handle) {
    handle->page_end_time = page_end_time;
    handle->page_length = page_length;
    handle->page = descr->pg_cache_descr->page;
-    usec_t entries = handle->entries = page_length / sizeof(storage_number);
+    usec_t entries = handle->entries = page_length / ctx->storage_size;
    if (likely(entries > 1))
        handle->dt = (page_end_time - descr->start_time) / (entries - 1);
+    else {
+        // TODO we should store the dt of each page in each page
+        // now we keep the dt of whatever was before
+        ;
+    }

    handle->dt_sec = (time_t)(handle->dt / USEC_PER_SEC);
    handle->position = position;
@ -603,21 +469,21 @@ static int rrdeng_load_page_next(struct rrddim_query_handle *rrdimm_handle) {
 // Returns the metric and sets its timestamp into current_time
 // IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags)
 // IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES
-NETDATA_DOUBLE
-rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *start_time, time_t *end_time, SN_FLAGS *flags) {
+STORAGE_POINT rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle) {
    struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
+    // struct rrdeng_metric_handle *metric_handle = handle->metric_handle;

+    STORAGE_POINT sp;
    struct rrdeng_page_descr *descr = handle->descr;
    unsigned position = handle->position + 1;
    time_t now = handle->now + handle->dt_sec;
+    storage_number_tier1_t tier1_value;

    if (unlikely(INVALID_TIME == handle->next_page_time)) {
        handle->next_page_time = INVALID_TIME;
        handle->now = now;
-        *start_time = now - handle->dt_sec;
-        *end_time = now;
-        *flags = SN_EMPTY_SLOT;
-        return NAN;
+        storage_point_empty(sp, now - handle->dt_sec, now);
+        return sp;
    }

    if (unlikely(!descr || position >= handle->entries)) {
@ -626,10 +492,8 @@ rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *start
            // next calls will not load any more metrics
            handle->next_page_time = INVALID_TIME;
            handle->now = now;
-            *start_time = now - handle->dt_sec;
-            *end_time = now;
-            *flags = SN_EMPTY_SLOT;
-            return NAN;
+            storage_point_empty(sp, now - handle->dt_sec, now);
+            return sp;
        }

        descr = handle->descr;
@ -637,19 +501,51 @@ rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *start
        now = (time_t)((descr->start_time + position * handle->dt) / USEC_PER_SEC);
    }

-    storage_number n = handle->page[position];
+    sp.start_time = now - handle->dt_sec;
+    sp.end_time = now;
+
    handle->position = position;
    handle->now = now;

+    switch(descr->type) {
+        case PAGE_METRICS: {
+            storage_number n = handle->page[position];
+            sp.min = sp.max = sp.sum = unpack_storage_number(n);
+            sp.flags = n & SN_ALL_FLAGS;
+            sp.count = 1;
+            sp.anomaly_count = (n & SN_ANOMALY_BIT) ? 0 : 1;
+        }
+        break;
+
+        case PAGE_TIER: {
+            tier1_value = ((storage_number_tier1_t *)handle->page)[position];
+            sp.flags = tier1_value.anomaly_count ? 0 : SN_ANOMALY_BIT;
+            sp.count = tier1_value.count;
+            sp.anomaly_count = tier1_value.anomaly_count;
+            sp.min = tier1_value.min_value;
+            sp.max = tier1_value.max_value;
+            sp.sum = tier1_value.sum_value;
+        }
+        break;
+
+        // we don't know this page type
+        default: {
+            static bool logged = false;
+            if(!logged) {
+                error("DBENGINE: unknown page type %d found. Cannot decode it. Ignoring its metrics.", descr->type);
+                logged = true;
+            }
+            storage_point_empty(sp, sp.start_time, sp.end_time);
+        }
+        break;
+    }
+
    if (unlikely(now >= rrdimm_handle->end_time)) {
        // next calls will not load any more metrics
        handle->next_page_time = INVALID_TIME;
    }

-    *flags = n & SN_ALL_FLAGS;
-    *start_time = now - handle->dt_sec;
-    *end_time = now;
-    return unpack_storage_number(n);
+    return sp;
 }

 int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle)
@ -679,31 +575,27 @@ void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle)
    rrdimm_handle->handle = NULL;
 }

-time_t rrdeng_metric_latest_time(RRDDIM *rd)
-{
-    struct pg_cache_page_index *page_index;
-
-    page_index = rd->state->page_index;
+time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
+    struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle;

+    struct pg_cache_page_index *page_index = metric_handle->page_index;
    return page_index->latest_time / USEC_PER_SEC;
 }
-time_t rrdeng_metric_oldest_time(RRDDIM *rd)
-{
-    struct pg_cache_page_index *page_index;
-
-    page_index = rd->state->page_index;
+time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
+    struct rrdeng_metric_handle *metric_handle = (struct rrdeng_metric_handle *)db_metric_handle;

+    struct pg_cache_page_index *page_index = metric_handle->page_index;
    return page_index->oldest_time / USEC_PER_SEC;
 }

-int rrdeng_metric_latest_time_by_uuid(uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t)
+int rrdeng_metric_latest_time_by_uuid(uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t, int tier)
 {
    struct page_cache *pg_cache;
    struct rrdengine_instance *ctx;
    Pvoid_t *PValue;
    struct pg_cache_page_index *page_index = NULL;

-    ctx = get_rrdeng_ctx_from_host(localhost);
+    ctx = get_rrdeng_ctx_from_host(localhost, tier);
    if (unlikely(!ctx)) {
        error("Failed to fetch multidb context");
        return 1;
@ -736,6 +628,7 @@ void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrde

    descr = pg_cache_create_descr();
    descr->id = id; /* TODO: add page type: metric, log, something? */
+    descr->type = ctx->page_type;
    page = dbengine_page_alloc(); /*TODO: add page size */
    rrdeng_page_descr_mutex_lock(ctx, descr);
    pg_cache_descr = descr->pg_cache_descr;
@ -899,8 +792,7 @@ void rrdeng_put_page(struct rrdengine_instance *ctx, void *handle)
 * Returns 0 on success, negative on error
 */
 int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb,
-                unsigned disk_space_mb)
-{
+                unsigned disk_space_mb, int tier) {
    struct rrdengine_instance *ctx;
    int error;
    uint32_t max_open_files;
@ -912,18 +804,24 @@ int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_p
    if (rrdeng_reserved_file_descriptors > max_open_files) {
        error(
            "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.",
-            (unsigned)rrdeng_reserved_file_descriptors, (unsigned)max_open_files);
+            (unsigned)rrdeng_reserved_file_descriptors,
+            (unsigned)max_open_files);

        rrd_stat_atomic_add(&global_fs_errors, 1);
        rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
        return UV_EMFILE;
    }

-    if (NULL == ctxp) {
-        ctx = &multidb_ctx;
+    if(NULL == ctxp) {
+        ctx = multidb_ctx[tier];
        memset(ctx, 0, sizeof(*ctx));
-    } else {
+        ctx->storage_size = (tier == 0) ? sizeof(storage_number) : sizeof(storage_number_tier1_t);
+        ctx->tier = tier;
+        ctx->page_type = !tier ? PAGE_METRICS : PAGE_TIER;  // TODO: In the future it can be different page type per tier
+    }
+    else {
        *ctxp = ctx = callocz(1, sizeof(*ctx));
+        ctx->storage_size = sizeof(storage_number);
    }
    ctx->global_compress_alg = RRD_LZ4;
    if (page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB)
@ -977,7 +875,7 @@ error_after_rrdeng_worker:
    finalize_rrd_files(ctx);
 error_after_init_rrd_files:
    free_page_cache(ctx);
-    if (ctx != &multidb_ctx) {
+    if (!is_storage_engine_shared((STORAGE_INSTANCE *)ctx)) {
        freez(ctx);
        *ctxp = NULL;
    }
@ -1006,9 +904,9 @@ int rrdeng_exit(struct rrdengine_instance *ctx)
    //metalog_exit(ctx->metalog_ctx);
    free_page_cache(ctx);

-    if (ctx != &multidb_ctx) {
+    if(!is_storage_engine_shared((STORAGE_INSTANCE *)ctx))
        freez(ctx);
-    }
+
    rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE);
    return 0;
 }
--- a/database/engine/rrdengineapi.h
+++ b/database/engine/rrdengineapi.h
@ -19,7 +19,7 @@ extern int default_rrdeng_page_cache_mb;
 extern int default_rrdeng_disk_quota_mb;
 extern int default_multidb_disk_quota_mb;
 extern uint8_t rrdeng_drop_metrics_under_page_cache_pressure;
-extern struct rrdengine_instance multidb_ctx;
+extern struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS];

 struct rrdeng_region_info {
    time_t start_time;
@ -39,29 +39,39 @@ extern void rrdeng_convert_legacy_uuid_to_multihost(char machine_guid[GUID_LEN +
                                                    uuid_t *ret_uuid);


-extern void rrdeng_metric_init(RRDDIM *rd);
-extern void rrdeng_store_metric_init(RRDDIM *rd);
-extern void rrdeng_store_metric_flush_current_page(RRDDIM *rd);
-extern void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE number, SN_FLAGS flags);
-extern int rrdeng_store_metric_finalize(RRDDIM *rd);
-extern unsigned
-    rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time,
+extern STORAGE_METRIC_HANDLE *rrdeng_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance);
+extern void rrdeng_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle);
+
+extern STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle);
+extern void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle);
+extern void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE n,
+                                     NETDATA_DOUBLE min_value,
+                                     NETDATA_DOUBLE max_value,
+                                     uint16_t count,
+                                     uint16_t anomaly_count,
+                                     SN_FLAGS flags);
+extern int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle);
+
+extern unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t end_time,
                                    struct rrdeng_region_info **region_info_arrayp, unsigned *max_intervalp, struct context_param *context_param_list);
-extern void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_handle,
-                                    time_t start_time, time_t end_time);
-extern NETDATA_DOUBLE rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *start_time, time_t *end_time, SN_FLAGS *flags);
+
+extern void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *rrdimm_handle,
+                                    time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type);
+extern STORAGE_POINT rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle);
+
 extern int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle);
 extern void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle);
-extern time_t rrdeng_metric_latest_time(RRDDIM *rd);
-extern time_t rrdeng_metric_oldest_time(RRDDIM *rd);
+extern time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle);
+extern time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle);
+
 extern void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array);

 /* must call once before using anything */
 extern int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb,
-                       unsigned disk_space_mb);
+                       unsigned disk_space_mb, int tier);

 extern int rrdeng_exit(struct rrdengine_instance *ctx);
 extern void rrdeng_prepare_exit(struct rrdengine_instance *ctx);
-extern int rrdeng_metric_latest_time_by_uuid(uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t);
+extern int rrdeng_metric_latest_time_by_uuid(uuid_t *dim_uuid, time_t *first_entry_t, time_t *last_entry_t, int tier);

 #endif /* NETDATA_RRDENGINEAPI_H */
--- a/database/metric_correlations.c
+++ b/database/metric_correlations.c
@ -401,7 +401,7 @@ static int rrdset_metric_correlations_ks2(RRDSET *st, DICTIONARY *results,
    high_rrdr = rrd2rrdr(owa, st, points,
                         after, before, group,
                         group_time, options, NULL, context_param_list, group_options,
-                         timeout);
+                         timeout, 0);
    if(!high_rrdr) {
        info("Metric correlations: rrd2rrdr() failed for the highlighted window on chart '%s'.", st->name);
        goto cleanup;
@ -427,7 +427,7 @@ static int rrdset_metric_correlations_ks2(RRDSET *st, DICTIONARY *results,
    base_rrdr = rrd2rrdr(owa, st,high_points << shifts,
                    baseline_after, baseline_before, group,
                    group_time, options, NULL, context_param_list, group_options,
-                    (int)(timeout - ((now_usec - started_usec) / USEC_PER_MS)));
+                    (int)(timeout - ((now_usec - started_usec) / USEC_PER_MS)), 0);
    if(!base_rrdr) {
        info("Metric correlations: rrd2rrdr() failed for the baseline window on chart '%s'.", st->name);
        goto cleanup;
@ -549,7 +549,7 @@ static int rrdset_metric_correlations_volume(RRDSET *st, DICTIONARY *results,
                                  group, group_options, group_time, options,
                                  NULL, NULL,
                                  &stats->db_points, &stats->result_points,
-                                  &value_is_null, &base_anomaly_rate, 0);
+                                  &value_is_null, &base_anomaly_rate, 0, 0);

        if(ret != HTTP_RESP_OK || value_is_null || !netdata_double_isnumber(baseline_average)) {
            // this means no data for the baseline window, but we may have data for the highlighted one - assume zero
@ -565,7 +565,7 @@ static int rrdset_metric_correlations_volume(RRDSET *st, DICTIONARY *results,
                                  group, group_options, group_time, options,
                                  NULL, NULL,
                                  &stats->db_points, &stats->result_points,
-                                  &value_is_null, &high_anomaly_rate, 0);
+                                  &value_is_null, &high_anomaly_rate, 0, 0);

        if(ret != HTTP_RESP_OK || value_is_null || !netdata_double_isnumber(highlight_average)) {
            // this means no data for the highlighted duration - so skip it
@ -590,7 +590,7 @@ static int rrdset_metric_correlations_volume(RRDSET *st, DICTIONARY *results,
                                  group_time, options,
                                  NULL, NULL,
                                  &stats->db_points, &stats->result_points,
-                                  &value_is_null, NULL, 0);
+                                  &value_is_null, NULL, 0, 0);

        if(ret != HTTP_RESP_OK || value_is_null || !netdata_double_isnumber(highlight_countif)) {
            info("MC: highlighted countif query failed, but highlighted average worked - strange...");
@ -733,12 +733,12 @@ int metric_correlations(RRDHOST *host, BUFFER *wb, METRIC_CORRELATIONS_METHOD me

    if(!points) points = 500;

-    rrdr_relative_window_to_absolute(&after, &before, default_rrd_update_every, points);
+    rrdr_relative_window_to_absolute(&after, &before);

    if(baseline_before <= API_RELATIVE_TIME_MAX)
        baseline_before += after;

-    rrdr_relative_window_to_absolute(&baseline_after, &baseline_before, default_rrd_update_every, points * 4);
+    rrdr_relative_window_to_absolute(&baseline_after, &baseline_before);

    if (before <= after || baseline_before <= baseline_after) {
        buffer_strcat(wb, "{\"error\": \"Invalid baseline or highlight ranges.\" }");
--- a/database/ram/rrddim_mem.c
+++ b/database/ram/rrddim_mem.c
@ -5,34 +5,151 @@
 // ----------------------------------------------------------------------------
 // RRDDIM legacy data collection functions

-void rrddim_collect_init(RRDDIM *rd) {
-    rd->db[rd->rrdset->current_entry] = SN_EMPTY_SLOT;
-    rd->state->handle = calloc(1, sizeof(struct mem_collect_handle));
+STORAGE_METRIC_HANDLE *rrddim_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance __maybe_unused) {
+    return (STORAGE_METRIC_HANDLE *)rd;
 }
-void rrddim_collect_store_metric(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE number, SN_FLAGS flags) {
-    (void)point_in_time;
+
+void rrddim_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle __maybe_unused) {
+    ;
+}
+
+STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle) {
+    RRDDIM *rd = (RRDDIM *)db_metric_handle;
+    rd->db[rd->rrdset->current_entry] = SN_EMPTY_SLOT;
+    struct mem_collect_handle *ch = calloc(1, sizeof(struct mem_collect_handle));
+    ch->rd = rd;
+    return (STORAGE_COLLECT_HANDLE *)ch;
+}
+
+void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number,
+        NETDATA_DOUBLE min_value,
+        NETDATA_DOUBLE max_value,
+        uint16_t count,
+        uint16_t anomaly_count,
+        SN_FLAGS flags)
+{
+    UNUSED(point_in_time);
+    UNUSED(min_value);
+    UNUSED(max_value);
+    UNUSED(count);
+    UNUSED(anomaly_count);
+
+    struct mem_collect_handle *ch = (struct mem_collect_handle *)collection_handle;
+    RRDDIM *rd = ch->rd;
    rd->db[rd->rrdset->current_entry] = pack_storage_number(number, flags);
 }
-int rrddim_collect_finalize(RRDDIM *rd) {
-    free((struct mem_collect_handle*)rd->state->handle);
+
+void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *collection_handle) {
+    struct mem_collect_handle *ch = (struct mem_collect_handle *)collection_handle;
+    RRDDIM *rd = ch->rd;
+    memset(rd->db, 0, rd->entries * sizeof(storage_number));
+}
+
+int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
+    free(collection_handle);
    return 0;
 }

+// ----------------------------------------------------------------------------
+
+// get the total duration in seconds of the round robin database
+#define rrddim_duration(st) (( (time_t)(rd)->rrdset->counter >= (time_t)(rd)->rrdset->entries ? (time_t)(rd)->rrdset->entries : (time_t)(rd)->rrdset->counter ) * (time_t)(rd)->rrdset->update_every)
+
+// get the last slot updated in the round robin database
+#define rrddim_last_slot(rd) ((size_t)(((rd)->rrdset->current_entry == 0) ? (rd)->rrdset->entries - 1 : (rd)->rrdset->current_entry - 1))
+
+// return the slot that has the oldest value
+#define rrddim_first_slot(rd) ((size_t)((rd)->rrdset->counter >= (size_t)(rd)->rrdset->entries ? (rd)->rrdset->current_entry : 0))
+
+// get the slot of the round robin database, for the given timestamp (t)
+// it always returns a valid slot, although may not be for the time requested if the time is outside the round robin database
+// only valid when not using dbengine
+static inline size_t rrddim_time2slot(RRDDIM *rd, time_t t) {
+    size_t ret = 0;
+    time_t last_entry_t  = rrddim_query_latest_time((STORAGE_METRIC_HANDLE *)rd);
+    time_t first_entry_t = rrddim_query_oldest_time((STORAGE_METRIC_HANDLE *)rd);
+    size_t entries       = rd->rrdset->entries;
+    size_t first_slot    = rrddim_first_slot(rd);
+    size_t last_slot     = rrddim_last_slot(rd);
+    size_t update_every  = rd->rrdset->update_every;
+
+    if(t >= last_entry_t) {
+        // the requested time is after the last entry we have
+        ret = last_slot;
+    }
+    else {
+        if(t <= first_entry_t) {
+            // the requested time is before the first entry we have
+            ret = first_slot;
+        }
+        else {
+            if(last_slot >= (size_t)((last_entry_t - t) / update_every))
+                ret = last_slot - ((last_entry_t - t) / update_every);
+            else
+                ret = last_slot - ((last_entry_t - t) / update_every) + entries;
+        }
+    }
+
+    if(unlikely(ret >= entries)) {
+        error("INTERNAL ERROR: rrddim_time2slot() on %s returns values outside entries", rd->name);
+        ret = entries - 1;
+    }
+
+    return ret;
+}
+
+// get the timestamp of a specific slot in the round robin database
+// only valid when not using dbengine
+static inline time_t rrddim_slot2time(RRDDIM *rd, size_t slot) {
+    time_t ret;
+    time_t last_entry_t  = rrddim_query_latest_time((STORAGE_METRIC_HANDLE *)rd);
+    time_t first_entry_t = rrddim_query_oldest_time((STORAGE_METRIC_HANDLE *)rd);
+    size_t entries       = rd->rrdset->entries;
+    size_t last_slot     = rrddim_last_slot(rd);
+    size_t update_every  = rd->rrdset->update_every;
+
+    if(slot >= entries) {
+        error("INTERNAL ERROR: caller of rrddim_slot2time() gives invalid slot %zu", slot);
+        slot = entries - 1;
+    }
+
+    if(slot > last_slot)
+        ret = last_entry_t - (time_t)(update_every * (last_slot - slot + entries));
+    else
+        ret = last_entry_t - (time_t)(update_every * (last_slot - slot));
+
+    if(unlikely(ret < first_entry_t)) {
+        error("INTERNAL ERROR: rrddim_slot2time() on %s returns time too far in the past", rd->name);
+        ret = first_entry_t;
+    }
+
+    if(unlikely(ret > last_entry_t)) {
+        error("INTERNAL ERROR: rrddim_slot2time() on %s returns time into the future", rd->name);
+        ret = last_entry_t;
+    }
+
+    return ret;
+}
+
 // ----------------------------------------------------------------------------
 // RRDDIM legacy database query functions

-void rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time) {
+void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type) {
+    UNUSED(tier_query_fetch_type);
+
+    RRDDIM *rd = (RRDDIM *)db_metric_handle;
+
    handle->rd = rd;
    handle->start_time = start_time;
    handle->end_time = end_time;
    struct mem_query_handle* h = calloc(1, sizeof(struct mem_query_handle));
-    h->slot = rrdset_time2slot(rd->rrdset, start_time);
-    h->last_slot = rrdset_time2slot(rd->rrdset, end_time);
-    h->dt = rd->update_every;
+    h->slot           = rrddim_time2slot(rd, start_time);
+    h->last_slot      = rrddim_time2slot(rd, end_time);
+    h->dt = rd->rrdset->update_every;

    h->next_timestamp = start_time;
-    h->slot_timestamp = rrdset_slot2time(rd->rrdset, h->slot);
-    h->last_timestamp = rrdset_slot2time(rd->rrdset, h->last_slot);
+    h->slot_timestamp = rrddim_slot2time(rd, h->slot);
+    h->last_timestamp = rrddim_slot2time(rd, h->last_slot);

    // info("RRDDIM QUERY INIT: start %ld, end %ld, next %ld, first %ld, last %ld, dt %ld", start_time, end_time, h->next_timestamp, h->slot_timestamp, h->last_timestamp, h->dt);

@ -42,28 +159,30 @@ void rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t st
 // Returns the metric and sets its timestamp into current_time
 // IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags)
 // IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES
-NETDATA_DOUBLE
-rrddim_query_next_metric(struct rrddim_query_handle *handle, time_t *start_time, time_t *end_time, SN_FLAGS *flags) {
+STORAGE_POINT rrddim_query_next_metric(struct rrddim_query_handle *handle) {
    RRDDIM *rd = handle->rd;
    struct mem_query_handle* h = (struct mem_query_handle*)handle->handle;
    size_t entries = rd->rrdset->entries;
    size_t slot = h->slot;

+    STORAGE_POINT sp;
+    sp.count = 1;
+
    time_t this_timestamp = h->next_timestamp;
    h->next_timestamp += h->dt;

    // set this timestamp for our caller
-    *start_time = this_timestamp - h->dt;
-    *end_time = this_timestamp;
+    sp.start_time = this_timestamp - h->dt;
+    sp.end_time = this_timestamp;

    if(unlikely(this_timestamp < h->slot_timestamp)) {
-        *flags = SN_EMPTY_SLOT;
-        return NAN;
+        storage_point_empty(sp, sp.start_time, sp.end_time);
+        return sp;
    }

    if(unlikely(this_timestamp > h->last_timestamp)) {
-        *flags = SN_EMPTY_SLOT;
-        return NAN;
+        storage_point_empty(sp, sp.start_time, sp.end_time);
+        return sp;
    }

    storage_number n = rd->db[slot++];
@ -72,8 +191,11 @@ rrddim_query_next_metric(struct rrddim_query_handle *handle, time_t *start_time,
    h->slot = slot;
    h->slot_timestamp += h->dt;

-    *flags = (n & SN_ALL_FLAGS);
-    return unpack_storage_number(n);
+    sp.anomaly_count = (n & SN_ANOMALY_BIT) ? 0 : 1;
+    sp.flags = (n & SN_ALL_FLAGS);
+    sp.min = sp.max = sp.sum = unpack_storage_number(n);
+
+    return sp;
 }

 int rrddim_query_is_finished(struct rrddim_query_handle *handle) {
@ -89,10 +211,12 @@ void rrddim_query_finalize(struct rrddim_query_handle *handle) {
    freez(handle->handle);
 }

-time_t rrddim_query_latest_time(RRDDIM *rd) {
-    return rrdset_last_entry_t_nolock(rd->rrdset);
+time_t rrddim_query_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
+    RRDDIM *rd = (RRDDIM *)db_metric_handle;
+    return rd->rrdset->last_updated.tv_sec;
 }

-time_t rrddim_query_oldest_time(RRDDIM *rd) {
-    return rrdset_first_entry_t_nolock(rd->rrdset);
+time_t rrddim_query_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) {
+    RRDDIM *rd = (RRDDIM *)db_metric_handle;
+    return (time_t)(rd->rrdset->last_updated.tv_sec - rrddim_duration(rd));
 }
--- a/database/ram/rrddim_mem.h
+++ b/database/ram/rrddim_mem.h
@ -6,6 +6,7 @@
 #include "database/rrd.h"

 struct mem_collect_handle {
+    RRDDIM *rd;
    long slot;
    long entries;
 };
@ -19,16 +20,24 @@ struct mem_query_handle {
    size_t last_slot;
 };

-extern void rrddim_collect_init(RRDDIM *rd);
-extern void rrddim_collect_store_metric(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE number, SN_FLAGS flags);
-extern int rrddim_collect_finalize(RRDDIM *rd);
+extern STORAGE_METRIC_HANDLE *rrddim_metric_init(RRDDIM *rd, STORAGE_INSTANCE *db_instance);
+extern void rrddim_metric_free(STORAGE_METRIC_HANDLE *db_metric_handle);

-extern void rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time);
-extern NETDATA_DOUBLE
-rrddim_query_next_metric(struct rrddim_query_handle *handle, time_t *start_time, time_t *end_time, SN_FLAGS *flags);
+extern STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle);
+extern void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number,
+                                 NETDATA_DOUBLE min_value,
+                                 NETDATA_DOUBLE max_value,
+                                 uint16_t count,
+                                 uint16_t anomaly_count,
+                                 SN_FLAGS flags);
+extern void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *collection_handle);
+extern int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *collection_handle);
+
+extern void rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type);
+extern STORAGE_POINT rrddim_query_next_metric(struct rrddim_query_handle *handle);
 extern int rrddim_query_is_finished(struct rrddim_query_handle *handle);
 extern void rrddim_query_finalize(struct rrddim_query_handle *handle);
-extern time_t rrddim_query_latest_time(RRDDIM *rd);
-extern time_t rrddim_query_oldest_time(RRDDIM *rd);
+extern time_t rrddim_query_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle);
+extern time_t rrddim_query_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle);

 #endif
--- a/database/rrd.h
+++ b/database/rrd.h
@ -7,6 +7,11 @@
 extern "C" {
 #endif

+// non-existing structs instead of voids
+// to enable type checking at compile time
+typedef struct storage_instance STORAGE_INSTANCE;
+typedef struct storage_metric_handle STORAGE_METRIC_HANDLE;
+
 // forward typedefs
 typedef struct rrdhost RRDHOST;
 typedef struct rrddim RRDDIM;
@ -23,9 +28,10 @@ typedef void *ml_host_t;
 typedef void *ml_dimension_t;

 // forward declarations
-struct rrddim_volatile;
+struct rrddim_tier;
 struct rrdset_volatile;
 struct context_param;
+
 #ifdef ENABLE_DBENGINE
 struct rrdeng_page_descr;
 struct rrdengine_instance;
@ -34,6 +40,7 @@ struct pg_cache_page_index;

 #include "daemon/common.h"
 #include "web/api/queries/query.h"
+#include "web/api/queries/rrdr.h"
 #include "rrdvar.h"
 #include "rrdsetvar.h"
 #include "rrddimvar.h"
@ -43,6 +50,17 @@ struct pg_cache_page_index;
 #include "aclk/aclk_rrdhost_state.h"
 #include "sqlite/sqlite_health.h"

+extern int storage_tiers;
+extern int storage_tiers_grouping_iterations[RRD_STORAGE_TIERS];
+
+typedef enum {
+    RRD_BACKFILL_NONE,
+    RRD_BACKFILL_FULL,
+    RRD_BACKFILL_NEW
+} RRD_BACKFILL;
+
+extern RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS];
+
 enum {
    CONTEXT_FLAGS_ARCHIVE = 0x01,
    CONTEXT_FLAGS_CHART   = 0x02,
@ -226,6 +244,8 @@ struct rrddim {

    avl_t avl;                                      // the binary index - this has to be first member!

+    uuid_t metric_uuid;                             // global UUID for this metric (unique_across hosts)
+
    // ------------------------------------------------------------------------
    // the dimension definition

@ -257,7 +277,13 @@ struct rrddim {
                                                    // this is actual date time we updated the last_collected_value
                                                    // THIS IS DIFFERENT FROM THE SAME MEMBER OF RRDSET

-    struct rrddim_volatile *state;                  // volatile state that is not persistently stored
+#ifdef ENABLE_ACLK
+    int aclk_live_status;
+#endif
+    ml_dimension_t ml_dimension;
+
+    struct rrddim_tier *tiers[RRD_STORAGE_TIERS];   // our tiers of databases
+
    size_t collections_counter;                     // the number of times we added values to this rrdim
    collected_number collected_value_max;           // the absolute maximum of the collected value

@ -326,30 +352,71 @@ struct rrddim_query_handle {
    RRDDIM *rd;
    time_t start_time;
    time_t end_time;
+    TIER_QUERY_FETCH tier_query_fetch_type;
    STORAGE_QUERY_HANDLE* handle;
 };

+typedef struct storage_point {
+    NETDATA_DOUBLE min;     // when count > 1, this is the minimum among them
+    NETDATA_DOUBLE max;     // when count > 1, this is the maximum among them
+    NETDATA_DOUBLE sum;     // the point sum - divided by count gives the average
+
+    // end_time - start_time = point duration
+    time_t start_time;      // the time the point starts
+    time_t end_time;        // the time the point ends
+
+    unsigned count;         // the number of original points aggregated
+    unsigned anomaly_count; // the number of original points found anomalous
+
+    SN_FLAGS flags;         // flags stored with the point
+} STORAGE_POINT;
+
+#define storage_point_unset(x)                     do { \
+    (x).min = (x).max = (x).sum = NAN;                  \
+    (x).count = 0;                                      \
+    (x).anomaly_count = 0;                              \
+    (x).flags = SN_EMPTY_SLOT;                          \
+    (x).start_time = 0;                                 \
+    (x).end_time = 0;                                   \
+    } while(0)
+
+#define storage_point_empty(x, start_t, end_t)     do { \
+    (x).min = (x).max = (x).sum = NAN;                  \
+    (x).count = 1;                                      \
+    (x).anomaly_count = 0;                              \
+    (x).flags = SN_EMPTY_SLOT;                          \
+    (x).start_time = start_t;                           \
+    (x).end_time = end_t;                               \
+    } while(0)
+
+#define storage_point_is_unset(x) (!(x).count)
+#define storage_point_is_empty(x) (!netdata_double_isnumber((x).sum))
+
 // ------------------------------------------------------------------------
 // function pointers that handle data collection
 struct rrddim_collect_ops {
    // an initialization function to run before starting collection
-    void (*init)(RRDDIM *rd);
+    STORAGE_COLLECT_HANDLE *(*init)(STORAGE_METRIC_HANDLE *db_metric_handle);

    // run this to store each metric into the database
-    void (*store_metric)(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE number, SN_FLAGS flags);
+    void (*store_metric)(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number, NETDATA_DOUBLE min_value,
+                         NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags);
+
+    // run this to flush / reset the current data collection sequence
+    void (*flush)(STORAGE_COLLECT_HANDLE *collection_handle);

    // an finalization function to run after collection is over
    // returns 1 if it's safe to delete the dimension
-    int (*finalize)(RRDDIM *rd);
+    int (*finalize)(STORAGE_COLLECT_HANDLE *collection_handle);
 };

 // function pointers that handle database queries
 struct rrddim_query_ops {
    // run this before starting a series of next_metric() database queries
-    void (*init)(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time);
+    void (*init)(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type);

    // run this to load each metric number from the database
-    NETDATA_DOUBLE (*next_metric)(struct rrddim_query_handle *handle, time_t *current_time, time_t *end_time, SN_FLAGS *flags);
+    STORAGE_POINT (*next_metric)(struct rrddim_query_handle *handle);

    // run this to test if the series of next_metric() database queries is finished
    int (*is_finished)(struct rrddim_query_handle *handle);
@ -358,29 +425,31 @@ struct rrddim_query_ops {
    void (*finalize)(struct rrddim_query_handle *handle);

    // get the timestamp of the last entry of this metric
-    time_t (*latest_time)(RRDDIM *rd);
+    time_t (*latest_time)(STORAGE_METRIC_HANDLE *db_metric_handle);

    // get the timestamp of the first entry of this metric
-    time_t (*oldest_time)(RRDDIM *rd);
+    time_t (*oldest_time)(STORAGE_METRIC_HANDLE *db_metric_handle);
 };

+
 // ----------------------------------------------------------------------------
-// volatile state per RRD dimension
-struct rrddim_volatile {
-#ifdef ENABLE_DBENGINE
-    uuid_t *rrdeng_uuid;                 // database engine metric UUID
-    struct pg_cache_page_index *page_index;
-#endif
-#ifdef ENABLE_ACLK
-    int aclk_live_status;
-#endif
-    uuid_t metric_uuid;                 // global UUID for this metric (unique_across hosts)
-    STORAGE_COLLECT_HANDLE* handle;
+// Storage tier data for every dimension
+
+struct rrddim_tier {
+    int tier_grouping;
+    RRD_MEMORY_MODE mode;                           // the memory mode of this tier
+    RRD_BACKFILL backfill;                          // backfilling configuration
+    STORAGE_METRIC_HANDLE *db_metric_handle;        // the metric handle inside the database
+    STORAGE_COLLECT_HANDLE *db_collection_handle;   // the data collection handle
+    STORAGE_POINT virtual_point;
+    time_t next_point_time;
+    usec_t last_collected_ut;
    struct rrddim_collect_ops collect_ops;
    struct rrddim_query_ops query_ops;
-    ml_dimension_t ml_dimension;
 };

+extern void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, int tier, time_t now);
+
 // ----------------------------------------------------------------------------
 // volatile state per chart
 struct rrdset_volatile {
@ -854,9 +923,8 @@ struct rrdhost {
    avl_tree_lock rrdfamily_root_index;             // the host's chart families index
    avl_tree_lock rrdvar_root_index;                // the host's chart variables index

-#ifdef ENABLE_DBENGINE
-    struct rrdengine_instance *rrdeng_ctx;          // DB engine instance for this host
-#endif
+    STORAGE_INSTANCE *storage_instance[RRD_STORAGE_TIERS];  // the database instances of the storage tiers
+
    uuid_t  host_uuid;                              // Global GUID for this host
    uuid_t  *node_id;                               // Cloud node_id

@ -900,6 +968,10 @@ extern netdata_rwlock_t rrd_rwlock;

 // ----------------------------------------------------------------------------

+extern bool is_storage_engine_shared(STORAGE_INSTANCE *engine);
+
+// ----------------------------------------------------------------------------
+
 extern size_t rrd_hosts_available;
 extern time_t rrdhost_free_orphan_time;

@ -1067,28 +1139,49 @@ extern void rrdset_isnot_obsolete(RRDSET *st);
 #define rrdset_is_available_for_exporting_and_alarms(st) (!rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions)
 #define rrdset_is_archived(st) (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions)

-// get the total duration in seconds of the round robin database
-#define rrdset_duration(st) ((time_t)( (((st)->counter >= ((unsigned long)(st)->entries))?(unsigned long)(st)->entries:(st)->counter) * (st)->update_every ))
-
 // get the timestamp of the last entry in the round robin database
-static inline time_t rrdset_last_entry_t_nolock(RRDSET *st)
-{
-    if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
-        RRDDIM *rd;
-        time_t last_entry_t  = 0;
+static inline time_t rrddim_last_entry_t(RRDDIM *rd) {
+    time_t latest = rd->tiers[0]->query_ops.latest_time(rd->tiers[0]->db_metric_handle);

-        rrddim_foreach_read(rd, st) {
-            last_entry_t = MAX(last_entry_t, rd->state->query_ops.latest_time(rd));
-        }
+    for(int tier = 1; tier < storage_tiers ;tier++) {
+        if(unlikely(!rd->tiers[tier])) continue;

-        return last_entry_t;
-    } else {
-        return (time_t)st->last_updated.tv_sec;
+        time_t t = rd->tiers[tier]->query_ops.latest_time(rd->tiers[tier]->db_metric_handle);
+        if(t > latest)
+            latest = t;
    }
+
+    return latest;
 }

-static inline time_t rrdset_last_entry_t(RRDSET *st)
-{
+static inline time_t rrddim_first_entry_t(RRDDIM *rd) {
+    time_t oldest = 0;
+
+    for(int tier = 0; tier < storage_tiers ;tier++) {
+        if(unlikely(!rd->tiers[tier])) continue;
+
+        time_t t = rd->tiers[tier]->query_ops.oldest_time(rd->tiers[tier]->db_metric_handle);
+        if(t != 0 && (oldest == 0 || t < oldest))
+            oldest = t;
+    }
+
+    return oldest;
+}
+
+// get the timestamp of the last entry in the round robin database
+static inline time_t rrdset_last_entry_t_nolock(RRDSET *st) {
+    RRDDIM *rd;
+    time_t last_entry_t  = 0;
+
+    rrddim_foreach_read(rd, st) {
+        time_t t = rrddim_last_entry_t(rd);
+        if(t > last_entry_t) last_entry_t = t;
+    }
+
+    return last_entry_t;
+}
+
+static inline time_t rrdset_last_entry_t(RRDSET *st) {
    time_t last_entry_t;

    netdata_rwlock_rdlock(&st->rrdset_rwlock);
@ -1099,24 +1192,18 @@ static inline time_t rrdset_last_entry_t(RRDSET *st)
 }

 // get the timestamp of first entry in the round robin database
-static inline time_t rrdset_first_entry_t_nolock(RRDSET *st)
-{
-    if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
-        RRDDIM *rd;
-        time_t first_entry_t = LONG_MAX;
+static inline time_t rrdset_first_entry_t_nolock(RRDSET *st) {
+    RRDDIM *rd;
+    time_t first_entry_t = LONG_MAX;

-        rrddim_foreach_read(rd, st) {
-            first_entry_t =
-                MIN(first_entry_t,
-                    rd->state->query_ops.oldest_time(rd) > st->update_every ?
-                        rd->state->query_ops.oldest_time(rd) - st->update_every : 0);
-        }
-
-        if (unlikely(LONG_MAX == first_entry_t)) return 0;
-        return first_entry_t;
-    } else {
-        return (time_t)(rrdset_last_entry_t_nolock(st) - rrdset_duration(st));
+    rrddim_foreach_read(rd, st) {
+        time_t t = rrddim_first_entry_t(rd);
+        if(t < first_entry_t)
+            first_entry_t = t;
    }
+
+    if (unlikely(LONG_MAX == first_entry_t)) return 0;
+    return first_entry_t;
 }

 static inline time_t rrdset_first_entry_t(RRDSET *st)
@ -1130,105 +1217,8 @@ static inline time_t rrdset_first_entry_t(RRDSET *st)
    return first_entry_t;
 }

-// get the timestamp of the last entry in the round robin database
-static inline time_t rrddim_last_entry_t(RRDDIM *rd) {
-    if (rd->rrdset->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE)
-        return rd->state->query_ops.latest_time(rd);
-    return (time_t)rd->rrdset->last_updated.tv_sec;
-}
-
-static inline time_t rrddim_first_entry_t(RRDDIM *rd) {
-    if (rd->rrdset->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE)
-        return rd->state->query_ops.oldest_time(rd);
-    return (time_t)(rd->rrdset->last_updated.tv_sec - rrdset_duration(rd->rrdset));
-}
-
 time_t rrdhost_last_entry_t(RRDHOST *h);

-// get the last slot updated in the round robin database
-#define rrdset_last_slot(st) ((size_t)(((st)->current_entry == 0) ? (st)->entries - 1 : (st)->current_entry - 1))
-
-// get the first / oldest slot updated in the round robin database
-// #define rrdset_first_slot(st) ((size_t)( (((st)->counter >= ((unsigned long)(st)->entries)) ? (unsigned long)( ((unsigned long)(st)->current_entry > 0) ? ((unsigned long)(st)->current_entry) : ((unsigned long)(st)->entries) ) - 1 : 0) ))
-
-// return the slot that has the oldest value
-
-static inline size_t rrdset_first_slot(RRDSET *st) {
-    if(st->counter >= (size_t)st->entries) {
-        // the database has been rotated at least once
-        // the oldest entry is the one that will be next
-        // overwritten by data collection
-        return (size_t)st->current_entry;
-    }
-
-    // we do not have rotated the db yet
-    // so 0 is the first entry
-    return 0;
-}
-
-// get the slot of the round robin database, for the given timestamp (t)
-// it always returns a valid slot, although may not be for the time requested if the time is outside the round robin database
-// only valid when not using dbengine
-static inline size_t rrdset_time2slot(RRDSET *st, time_t t) {
-    size_t ret = 0;
-    time_t last_entry_t = rrdset_last_entry_t_nolock(st);
-    time_t first_entry_t = rrdset_first_entry_t_nolock(st);
-
-    if(t >= last_entry_t) {
-        // the requested time is after the last entry we have
-        ret = rrdset_last_slot(st);
-    }
-    else {
-        if(t <= first_entry_t) {
-            // the requested time is before the first entry we have
-            ret = rrdset_first_slot(st);
-        }
-        else {
-            if(rrdset_last_slot(st) >= (size_t)((last_entry_t - t) / st->update_every))
-                ret = rrdset_last_slot(st) - ((last_entry_t - t) / st->update_every);
-            else
-                ret = rrdset_last_slot(st) - ((last_entry_t - t) / st->update_every) + st->entries;
-        }
-    }
-
-    if(unlikely(ret >= (size_t)st->entries)) {
-        error("INTERNAL ERROR: rrdset_time2slot() on %s returns values outside entries", st->name);
-        ret = (size_t)(st->entries - 1);
-    }
-
-    return ret;
-}
-
-// get the timestamp of a specific slot in the round robin database
-// only valid when not using dbengine
-static inline time_t rrdset_slot2time(RRDSET *st, size_t slot) {
-    time_t ret;
-    time_t last_entry_t = rrdset_last_entry_t_nolock(st);
-    time_t first_entry_t = rrdset_first_entry_t_nolock(st);
-
-    if(slot >= (size_t)st->entries) {
-        error("INTERNAL ERROR: caller of rrdset_slot2time() gives invalid slot %zu", slot);
-        slot = (size_t)st->entries - 1;
-    }
-
-    if(slot > rrdset_last_slot(st))
-        ret = last_entry_t - (time_t)(st->update_every * (rrdset_last_slot(st) - slot + (size_t)st->entries));
-    else
-        ret = last_entry_t - (time_t)(st->update_every * (rrdset_last_slot(st) - slot));
-
-    if(unlikely(ret < first_entry_t)) {
-        error("INTERNAL ERROR: rrdset_slot2time() on %s returns time too far in the past", st->name);
-        ret = first_entry_t;
-    }
-
-    if(unlikely(ret > last_entry_t)) {
-        error("INTERNAL ERROR: rrdset_slot2time() on %s returns time into the future", st->name);
-        ret = last_entry_t;
-    }
-
-    return ret;
-}
-
 // ----------------------------------------------------------------------------
 // RRD DIMENSION functions

@ -1318,6 +1308,8 @@ extern void set_host_properties(
    const char *guid, const char *os, const char *tags, const char *tzone, const char *abbrev_tzone, int32_t utc_offset,
    const char *program_name, const char *program_version);

+extern int get_tier_grouping(int tier);
+
 // ----------------------------------------------------------------------------
 // RRD DB engine declarations

--- a/database/rrddim.c
+++ b/database/rrddim.c
@ -144,7 +144,7 @@ time_t calc_dimension_liveness(RRDDIM *rd, time_t now)
 {
    time_t last_updated = rd->last_collected_time.tv_sec;
    int live;
-    if (rd->state->aclk_live_status == 1)
+    if (rd->aclk_live_status == 1)
        live =
            ((now - last_updated) <
             MIN(rrdset_free_obsolete_time, RRDSET_MINIMUM_DIM_OFFLINE_MULTIPLIER * rd->update_every));
@ -168,9 +168,16 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
        rc += rrddim_set_algorithm(st, rd, algorithm);
        rc += rrddim_set_multiplier(st, rd, multiplier);
        rc += rrddim_set_divisor(st, rd, divisor);
+
        if (rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) {
-            store_active_dimension(&rd->state->metric_uuid);
-            rd->state->collect_ops.init(rd);
+            store_active_dimension(&rd->metric_uuid);
+
+            for(int tier = 0; tier < storage_tiers ;tier++) {
+                if (rd->tiers[tier])
+                    rd->tiers[tier]->db_collection_handle =
+                        rd->tiers[tier]->collect_ops.init(rd->tiers[tier]->db_metric_handle);
+            }
+
            rrddim_flag_clear(rd, RRDDIM_FLAG_ARCHIVED);
            rrddimvar_create(rd, RRDVAR_TYPE_CALCULATED, NULL, NULL, &rd->last_stored_value, RRDVAR_OPTION_DEFAULT);
            rrddimvar_create(rd, RRDVAR_TYPE_COLLECTED, NULL, "_raw", &rd->last_collected_value, RRDVAR_OPTION_DEFAULT);
@ -180,9 +187,10 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
            rrdset_flag_set(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS);
            rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS);
        }
+
        if (unlikely(rc)) {
            debug(D_METADATALOG, "DIMENSION [%s] metadata updated", rd->id);
-            (void)sql_store_dimension(&rd->state->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor,
+            (void)sql_store_dimension(&rd->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor,
                                      rd->algorithm);
 #ifdef ENABLE_ACLK
            queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, now_realtime_sec()));
@ -246,23 +254,54 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte

    rd->rrd_memory_mode = memory_mode;

-    rd->state = callocz(1, sizeof(*rd->state));
 #ifdef ENABLE_ACLK
-    rd->state->aclk_live_status = -1;
+    rd->aclk_live_status = -1;
 #endif
-    (void) find_dimension_uuid(st, rd, &(rd->state->metric_uuid));
+    (void) find_dimension_uuid(st, rd, &(rd->metric_uuid));

-    STORAGE_ENGINE* eng = storage_engine_get(memory_mode);
-    rd->state->collect_ops = eng->api.collect_ops;
-    rd->state->query_ops = eng->api.query_ops;
+    // initialize the db tiers
+    {
+        size_t initialized = 0;
+        RRD_MEMORY_MODE wanted_mode = memory_mode;
+        for(int tier = 0; tier < storage_tiers ; tier++, wanted_mode = RRD_MEMORY_MODE_DBENGINE) {
+            STORAGE_ENGINE *eng = storage_engine_get(wanted_mode);
+            if(!eng) continue;

-#ifdef ENABLE_DBENGINE
-    if(memory_mode == RRD_MEMORY_MODE_DBENGINE) {
-        rrdeng_metric_init(rd);
+            rd->tiers[tier] = callocz(1, sizeof(struct rrddim_tier));
+            rd->tiers[tier]->tier_grouping = get_tier_grouping(tier);
+            rd->tiers[tier]->mode = eng->id;
+            rd->tiers[tier]->collect_ops = eng->api.collect_ops;
+            rd->tiers[tier]->query_ops = eng->api.query_ops;
+            rd->tiers[tier]->db_metric_handle = eng->api.init(rd, host->storage_instance[tier]);
+            storage_point_unset(rd->tiers[tier]->virtual_point);
+            initialized++;
+
+            // internal_error(true, "TIER GROUPING of chart '%s', dimension '%s' for tier %d is set to %d", rd->rrdset->name, rd->name, tier, rd->tiers[tier]->tier_grouping);
+        }
+
+        if(!initialized)
+            error("Failed to initialize all db tiers for chart '%s', dimension '%s", st->name, rd->name);
+
+        if(!rd->tiers[0])
+            error("Failed to initialize the first db tier for chart '%s', dimension '%s", st->name, rd->name);
    }
-#endif
-    store_active_dimension(&rd->state->metric_uuid);
-    rd->state->collect_ops.init(rd);
+
+    store_active_dimension(&rd->metric_uuid);
+
+    // initialize data collection for all tiers
+    {
+        size_t initialized = 0;
+        for (int tier = 0; tier < storage_tiers; tier++) {
+            if (rd->tiers[tier]) {
+                rd->tiers[tier]->db_collection_handle = rd->tiers[tier]->collect_ops.init(rd->tiers[tier]->db_metric_handle);
+                initialized++;
+            }
+        }
+
+        if(!initialized)
+            error("Failed to initialize data collection for all db tiers for chart '%s', dimension '%s", st->name, rd->name);
+    }
+
    // append this dimension
    if(!st->dimensions)
        st->dimensions = rd;
@ -318,10 +357,22 @@ void rrddim_free(RRDSET *st, RRDDIM *rd)
    debug(D_RRD_CALLS, "rrddim_free() %s.%s", st->name, rd->name);

    if (!rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) {
-        uint8_t can_delete_metric = rd->state->collect_ops.finalize(rd);
-        if (can_delete_metric && rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
+
+        size_t tiers_available = 0, tiers_said_yes = 0;
+        for(int tier = 0; tier < storage_tiers ;tier++) {
+            if(rd->tiers[tier]) {
+                tiers_available++;
+
+                if(rd->tiers[tier]->collect_ops.finalize(rd->tiers[tier]->db_collection_handle))
+                    tiers_said_yes++;
+
+                rd->tiers[tier]->db_collection_handle = NULL;
+            }
+        }
+
+        if (tiers_available == tiers_said_yes && tiers_said_yes && rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
            /* This metric has no data and no references */
-            delete_dimension_uuid(&rd->state->metric_uuid);
+            delete_dimension_uuid(&rd->metric_uuid);
        }
    }

@ -350,13 +401,20 @@ void rrddim_free(RRDSET *st, RRDDIM *rd)
 //        aclk_send_dimension_update(rd);
 //#endif

-    freez((void *)rd->id);
-    freez((void *)rd->name);
-    freez(rd->state);
-
    // this will free MEMORY_MODE_SAVE and MEMORY_MODE_MAP structures
    rrddim_memory_file_free(rd);

+    for(int tier = 0; tier < storage_tiers ;tier++) {
+        if(!rd->tiers[tier]) continue;
+
+        STORAGE_ENGINE* eng = storage_engine_get(rd->tiers[tier]->mode);
+        if(eng)
+            eng->api.free(rd->tiers[tier]->db_metric_handle);
+
+        freez(rd->tiers[tier]);
+        rd->tiers[tier] = NULL;
+    }
+
    if(rd->db) {
        if(rd->rrd_memory_mode == RRD_MEMORY_MODE_RAM)
            munmap(rd->db, rd->memsize);
@ -364,6 +422,8 @@ void rrddim_free(RRDSET *st, RRDDIM *rd)
            freez(rd->db);
    }

+    freez((void *)rd->id);
+    freez((void *)rd->name);
    freez(rd);
 }

@ -382,7 +442,7 @@ int rrddim_hide(RRDSET *st, const char *id) {
        return 1;
    }
    if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN))
-        (void)sql_set_dimension_option(&rd->state->metric_uuid, "hidden");
+        (void)sql_set_dimension_option(&rd->metric_uuid, "hidden");

    rrddim_flag_set(rd, RRDDIM_FLAG_HIDDEN);
    rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN);
@ -399,7 +459,7 @@ int rrddim_unhide(RRDSET *st, const char *id) {
        return 1;
    }
    if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN))
-        (void)sql_set_dimension_option(&rd->state->metric_uuid, NULL);
+        (void)sql_set_dimension_option(&rd->metric_uuid, NULL);

    rrddim_flag_clear(rd, RRDDIM_FLAG_HIDDEN);
    rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN);
--- a/database/rrdhost.c
+++ b/database/rrdhost.c
@ -3,6 +3,26 @@
 #define NETDATA_RRD_INTERNALS
 #include "rrd.h"

+int storage_tiers = 1;
+int storage_tiers_grouping_iterations[RRD_STORAGE_TIERS] = { 1, 60, 60, 60, 60 };
+RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS] = { RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW };
+
+#if RRD_STORAGE_TIERS != 5
+#error RRD_STORAGE_TIERS is not 5 - you need to update the grouping iterations per tier
+#endif
+
+int get_tier_grouping(int tier) {
+    if(unlikely(tier >= storage_tiers)) tier = storage_tiers - 1;
+    if(unlikely(tier < 0)) tier = 0;
+
+    int grouping = 1;
+    // first tier is always 1 iteration of whatever update every the chart has
+    for(int i = 1; i <= tier ;i++)
+        grouping *= storage_tiers_grouping_iterations[i];
+
+    return grouping;
+}
+
 RRDHOST *localhost = NULL;
 size_t rrd_hosts_available = 0;
 netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
@ -10,6 +30,18 @@ netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER;
 time_t rrdset_free_obsolete_time = 3600;
 time_t rrdhost_free_orphan_time = 3600;

+bool is_storage_engine_shared(STORAGE_INSTANCE *engine) {
+#ifdef ENABLE_DBENGINE
+    for(int tier = 0; tier < storage_tiers ;tier++) {
+        if (engine == (STORAGE_INSTANCE *)multidb_ctx[tier])
+            return true;
+    }
+#endif
+
+    return false;
+}
+
+
 // ----------------------------------------------------------------------------
 // RRDHOST index

@ -344,11 +376,28 @@ RRDHOST *rrdhost_create(const char *hostname,
        if (ret != 0 && errno != EEXIST)
            error("Host '%s': cannot create directory '%s'", host->hostname, dbenginepath);
        else ret = 0; // succeed
-        if (is_legacy) // initialize legacy dbengine instance as needed
-            ret = rrdeng_init(host, &host->rrdeng_ctx, dbenginepath, default_rrdeng_page_cache_mb,
-                              default_rrdeng_disk_quota_mb); // may fail here for legacy dbengine initialization
-        else
-            host->rrdeng_ctx = &multidb_ctx;
+        if (is_legacy) {
+            // initialize legacy dbengine instance as needed
+
+            ret = rrdeng_init(
+                host,
+                (struct rrdengine_instance **)&host->storage_instance[0],
+                dbenginepath,
+                default_rrdeng_page_cache_mb,
+                default_rrdeng_disk_quota_mb,
+                0); // may fail here for legacy dbengine initialization
+
+            if(ret == 0) {
+                // assign the rest of the shared storage instances to it
+                // to allow them collect its metrics too
+                for(int tier = 1; tier < storage_tiers ; tier++)
+                    host->storage_instance[tier] = (STORAGE_INSTANCE *)multidb_ctx[tier];
+            }
+        }
+        else {
+            for(int tier = 0; tier < storage_tiers ; tier++)
+                host->storage_instance[tier] = (STORAGE_INSTANCE *)multidb_ctx[tier];
+        }
        if (ret) { // check legacy or multihost initialization success
            error(
                "Host '%s': cannot initialize host with machine guid '%s'. Failed to initialize DB engine at '%s'.",
@ -366,7 +415,9 @@ RRDHOST *rrdhost_create(const char *hostname,
    }
    else {
 #ifdef ENABLE_DBENGINE
-        host->rrdeng_ctx = &multidb_ctx;
+        // the first tier is reserved for the non-dbengine modes
+        for(int tier = 1; tier < storage_tiers ; tier++)
+            host->storage_instance[tier] = (STORAGE_INSTANCE *)multidb_ctx[tier];
 #endif
    }

@ -673,7 +724,7 @@ restart_after_removal:
            if (rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST)
 #ifdef ENABLE_DBENGINE
                /* don't delete multi-host DB host files */
-                && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx == &multidb_ctx)
+                && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->storage_instance[0]))
 #endif
            )
                rrdhost_delete_charts(host);
@ -690,6 +741,35 @@ restart_after_removal:
 // RRDHOST global / startup initialization

 int rrd_init(char *hostname, struct rrdhost_system_info *system_info) {
+
+#ifdef ENABLE_DBENGINE
+    storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
+    if(storage_tiers < 1) {
+        error("At least 1 storage tier is required. Assuming 1.");
+        storage_tiers = 1;
+        config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
+    }
+    if(storage_tiers > RRD_STORAGE_TIERS) {
+        error("Up to %d storage tier are supported. Assuming %d.", RRD_STORAGE_TIERS, RRD_STORAGE_TIERS);
+        storage_tiers = RRD_STORAGE_TIERS;
+        config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
+    }
+
+    default_rrdeng_page_fetch_timeout = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page fetch timeout secs", PAGE_CACHE_FETCH_WAIT_TIMEOUT);
+    if (default_rrdeng_page_fetch_timeout < 1) {
+        info("'dbengine page fetch timeout secs' cannot be %d, using 1", default_rrdeng_page_fetch_timeout);
+        default_rrdeng_page_fetch_timeout = 1;
+        config_set_number(CONFIG_SECTION_DB, "dbengine page fetch timeout secs", default_rrdeng_page_fetch_timeout);
+    }
+
+    default_rrdeng_page_fetch_retries = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page fetch retries", MAX_PAGE_CACHE_FETCH_RETRIES);
+    if (default_rrdeng_page_fetch_retries < 1) {
+        info("\"dbengine page fetch retries\" found in netdata.conf cannot be %d, using 1", default_rrdeng_page_fetch_retries);
+        default_rrdeng_page_fetch_retries = 1;
+        config_set_number(CONFIG_SECTION_DB, "dbengine page fetch retries", default_rrdeng_page_fetch_retries);
+    }
+#endif
+
    rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time);
    // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
    // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
@ -713,7 +793,6 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info) {
    }

    health_init();
-
    rrdpush_init();

    debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname);
@ -746,24 +825,95 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info) {
    }

 #ifdef ENABLE_DBENGINE
+    int created_tiers = 0;
    char dbenginepath[FILENAME_MAX + 1];
-    int ret;
-    snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", localhost->cache_dir);
-    ret = mkdir(dbenginepath, 0775);
-    if (ret != 0 && errno != EEXIST)
-        error("Host '%s': cannot create directory '%s'", localhost->hostname, dbenginepath);
-    else  // Unconditionally create multihost db to support on demand host creation
-        ret = rrdeng_init(NULL, NULL, dbenginepath, default_rrdeng_page_cache_mb, default_multidb_disk_quota_mb);
-    if (ret) {
-        error(
-            "Host '%s' with machine guid '%s' failed to initialize multi-host DB engine instance at '%s'.",
-            localhost->hostname, localhost->machine_guid, localhost->cache_dir);
+    char dbengineconfig[200 + 1];
+    for(int tier = 0; tier < storage_tiers ;tier++) {
+        if(tier == 0)
+            snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", localhost->cache_dir);
+        else
+            snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine-tier%d", localhost->cache_dir, tier);
+
+        int ret = mkdir(dbenginepath, 0775);
+        if (ret != 0 && errno != EEXIST) {
+            error("DBENGINE on '%s': cannot create directory '%s'", localhost->hostname, dbenginepath);
+            break;
+        }
+
+        int page_cache_mb = default_rrdeng_page_cache_mb;
+        int disk_space_mb = default_multidb_disk_quota_mb;
+        int grouping_iterations = storage_tiers_grouping_iterations[tier];
+        RRD_BACKFILL backfill = storage_tiers_backfill[tier];
+
+        if(tier > 0) {
+            snprintfz(dbengineconfig, 200, "dbengine tier %d page cache size MB", tier);
+            page_cache_mb = config_get_number(CONFIG_SECTION_DB, dbengineconfig, page_cache_mb);
+
+            snprintfz(dbengineconfig, 200, "dbengine tier %d multihost disk space MB", tier);
+            disk_space_mb = config_get_number(CONFIG_SECTION_DB, dbengineconfig, disk_space_mb);
+
+            snprintfz(dbengineconfig, 200, "dbengine tier %d update every iterations", tier);
+            grouping_iterations = config_get_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations);
+            if(grouping_iterations < 2) {
+                grouping_iterations = 2;
+                config_set_number(CONFIG_SECTION_DB, dbengineconfig, grouping_iterations);
+                error("DBENGINE on '%s': 'dbegnine tier %d update every iterations' cannot be less than 2. Assuming 2.", localhost->hostname, tier);
+            }
+
+            snprintfz(dbengineconfig, 200, "dbengine tier %d backfill", tier);
+            const char *bf = config_get(CONFIG_SECTION_DB, dbengineconfig, backfill == RRD_BACKFILL_NEW ? "new" : backfill == RRD_BACKFILL_FULL ? "full" : "none");
+            if(strcmp(bf, "new") == 0) backfill = RRD_BACKFILL_NEW;
+            else if(strcmp(bf, "full") == 0) backfill = RRD_BACKFILL_FULL;
+            else if(strcmp(bf, "none") == 0) backfill = RRD_BACKFILL_NONE;
+            else {
+                error("DBENGINE: unknown backfill value '%s', assuming 'new'", bf);
+                config_set(CONFIG_SECTION_DB, dbengineconfig, "new");
+                backfill = RRD_BACKFILL_NEW;
+            }
+        }
+
+        storage_tiers_grouping_iterations[tier] = grouping_iterations;
+        storage_tiers_backfill[tier] = backfill;
+
+        if(tier > 0 && get_tier_grouping(tier) > 65535) {
+            storage_tiers_grouping_iterations[tier] = 1;
+            error("DBENGINE on '%s': dbengine tier %d gives aggregation of more than 65535 points of tier 0. Disabling tiers above %d", localhost->hostname, tier, tier);
+            break;
+        }
+        
+        internal_error(true, "DBENGINE tier %d grouping iterations is set to %d", tier, storage_tiers_grouping_iterations[tier]);
+        ret = rrdeng_init(NULL, NULL, dbenginepath, page_cache_mb, disk_space_mb, tier);
+        if(ret != 0) {
+            error("DBENGINE on '%s': Failed to initialize multi-host database tier %d on path '%s'",
+                  localhost->hostname, tier, dbenginepath);
+            break;
+        }
+        else
+            created_tiers++;
+    }
+
+    if(created_tiers && created_tiers < storage_tiers) {
+        error("DBENGINE on '%s': Managed to create %d tiers instead of %d. Continuing with %d available.",
+              localhost->hostname, created_tiers, storage_tiers, created_tiers);
+        storage_tiers = created_tiers;
+    }
+    else if(!created_tiers) {
+        error("DBENGINE on '%s', with machine guid '%s', failed to initialize databases at '%s'.",
+              localhost->hostname, localhost->machine_guid, localhost->cache_dir);
        rrdhost_free(localhost);
        localhost = NULL;
        rrd_unlock();
-        fatal("Failed to initialize dbengine");
+        fatal("DBENGINE: Failed to be initialized.");
+    }
+#else
+    storage_tiers = config_get_number(CONFIG_SECTION_DB, "storage tiers", 1);
+    if(storage_tiers != 1) {
+        error("DBENGINE is not available on '%s', so only 1 database tier can be supported.", localhost->hostname);
+        storage_tiers = 1;
+        config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers);
    }
 #endif
+
    if (likely(system_info))
       migrate_localhost(&localhost->host_uuid);
    sql_aclk_sync_init();
@ -911,11 +1061,14 @@ void rrdhost_free(RRDHOST *host) {
    // release its children resources

 #ifdef ENABLE_DBENGINE
-    if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
-        if (host->rrdeng_ctx != &multidb_ctx)
-            rrdeng_prepare_exit(host->rrdeng_ctx);
+    for(int tier = 0; tier < storage_tiers ;tier++) {
+        if(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE &&
+            host->storage_instance[tier] &&
+            !is_storage_engine_shared(host->storage_instance[tier]))
+            rrdeng_prepare_exit((struct rrdengine_instance *)host->storage_instance[tier]);
    }
 #endif
+
    while(host->rrdset_root)
        rrdset_free(host->rrdset_root);

@ -947,8 +1100,12 @@ void rrdhost_free(RRDHOST *host) {
    health_alarm_log_free(host);

 #ifdef ENABLE_DBENGINE
-    if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx != &multidb_ctx)
-        rrdeng_exit(host->rrdeng_ctx);
+    for(int tier = 0; tier < storage_tiers ;tier++) {
+        if(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE &&
+            host->storage_instance[tier] &&
+            !is_storage_engine_shared(host->storage_instance[tier]))
+            rrdeng_exit((struct rrdengine_instance *)host->storage_instance[tier]);
+    }
 #endif

    // ------------------------------------------------------------------------
@ -1267,7 +1424,7 @@ void rrdhost_cleanup_all(void) {
        if (host != localhost && rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST) && !host->receiver
 #ifdef ENABLE_DBENGINE
            /* don't delete multi-host DB host files */
-            && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && host->rrdeng_ctx == &multidb_ctx)
+            && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->storage_instance[0]))
 #endif
        )
            rrdhost_delete_charts(host);
@ -1323,11 +1480,24 @@ restart_after_removal:

                    if (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) {
                        rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE);
+
                        /* only a collector can mark a chart as obsolete, so we must remove the reference */
-                        uint8_t can_delete_metric = rd->state->collect_ops.finalize(rd);
-                        if (can_delete_metric) {
+
+                        size_t tiers_available = 0, tiers_said_yes = 0;
+                        for(int tier = 0; tier < storage_tiers ;tier++) {
+                            if(rd->tiers[tier]) {
+                                tiers_available++;
+
+                                if(rd->tiers[tier]->collect_ops.finalize(rd->tiers[tier]->db_collection_handle))
+                                    tiers_said_yes++;
+
+                                rd->tiers[tier]->db_collection_handle = NULL;
+                            }
+                        }
+
+                        if (tiers_available == tiers_said_yes && tiers_said_yes) {
                            /* This metric has no data and no references */
-                            delete_dimension_uuid(&rd->state->metric_uuid);
+                            delete_dimension_uuid(&rd->metric_uuid);
                            rrddim_free(st, rd);
                            if (unlikely(!last)) {
                                rd = st->dimensions;
--- a/database/rrdset.c
+++ b/database/rrdset.c
@ -273,12 +273,13 @@ void rrdset_reset(RRDSET *st) {
        rd->last_collected_time.tv_sec = 0;
        rd->last_collected_time.tv_usec = 0;
        rd->collections_counter = 0;
-        // memset(rd->values, 0, rd->entries * sizeof(storage_number));
-#ifdef ENABLE_DBENGINE
-        if (RRD_MEMORY_MODE_DBENGINE == st->rrd_memory_mode && !rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) {
-            rrdeng_store_metric_flush_current_page(rd);
+
+        if(!rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) {
+            for(int tier = 0; tier < storage_tiers ;tier++) {
+                if(rd->tiers[tier])
+                    rd->tiers[tier]->collect_ops.flush(rd->tiers[tier]->db_collection_handle);
+            }
        }
-#endif
    }
 }

@ -963,6 +964,105 @@ static inline usec_t rrdset_init_last_updated_time(RRDSET *st) {
    return last_updated_ut;
 }

+static inline time_t tier_next_point_time(RRDDIM *rd, struct rrddim_tier *t, time_t now) {
+    time_t loop = (time_t)rd->update_every * (time_t)t->tier_grouping;
+    return now + loop - ((now + loop) % loop);
+}
+
+void store_metric_at_tier(RRDDIM *rd, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut) {
+    if (unlikely(!t->next_point_time))
+        t->next_point_time = tier_next_point_time(rd, t, sp.end_time);
+
+    // merge the dates into our virtual point
+    if (unlikely(sp.start_time < t->virtual_point.start_time))
+        t->virtual_point.start_time = sp.start_time;
+
+    if (likely(sp.end_time > t->virtual_point.end_time))
+        t->virtual_point.end_time = sp.end_time;
+
+    // merge the values into our virtual point
+    if (likely(!storage_point_is_empty(sp))) {
+        // we aggregate only non NULLs into higher tiers
+
+        if (likely(!storage_point_is_unset(t->virtual_point))) {
+            // merge the collected point to our virtual one
+            t->virtual_point.sum += sp.sum;
+            t->virtual_point.min = MIN(t->virtual_point.min, sp.min);
+            t->virtual_point.max = MAX(t->virtual_point.max, sp.max);
+            t->virtual_point.count += sp.count;
+            t->virtual_point.anomaly_count += sp.anomaly_count;
+            t->virtual_point.flags |= sp.flags;
+        }
+        else {
+            // reset our virtual point to this one
+            t->virtual_point = sp;
+        }
+    }
+
+    if(unlikely(sp.end_time >= t->next_point_time)) {
+        if (likely(!storage_point_is_unset(t->virtual_point))) {
+
+            t->collect_ops.store_metric(
+                t->db_collection_handle,
+                now_ut,
+                t->virtual_point.sum,
+                t->virtual_point.min,
+                t->virtual_point.max,
+                t->virtual_point.count,
+                t->virtual_point.anomaly_count,
+                t->virtual_point.flags);
+        }
+        else {
+            t->collect_ops.store_metric(
+                t->db_collection_handle,
+                now_ut,
+                NAN,
+                NAN,
+                NAN,
+                0,
+                0,
+                SN_EMPTY_SLOT);
+        }
+
+        t->virtual_point.count = 0;
+        t->next_point_time = tier_next_point_time(rd, t, sp.end_time);
+    }
+}
+
+static void store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags) {
+
+    // store the metric on tier 0
+    rd->tiers[0]->collect_ops.store_metric(rd->tiers[0]->db_collection_handle, point_end_time_ut, n, 0, 0, 1, 0, flags);
+
+    for(int tier = 1; tier < storage_tiers ;tier++) {
+        if(unlikely(!rd->tiers[tier])) continue;
+
+        struct rrddim_tier *t = rd->tiers[tier];
+
+        time_t now = (time_t)(point_end_time_ut / USEC_PER_SEC);
+
+        if(!t->last_collected_ut) {
+            // we have not collected this tier before
+            // let's fill any gap that may exist
+            rrdr_fill_tier_gap_from_smaller_tiers(rd, tier, now);
+        }
+
+        STORAGE_POINT sp = {
+            .start_time = now - rd->update_every,
+            .end_time = now,
+            .min = n,
+            .max = n,
+            .sum = n,
+            .count = 1,
+            .anomaly_count = (flags & SN_ANOMALY_BIT) ? 0 : 1,
+            .flags = flags
+        };
+
+        t->last_collected_ut = point_end_time_ut;
+        store_metric_at_tier(rd, t, sp, point_end_time_ut);
+    }
+}
+
 static inline size_t rrdset_done_interpolate(
        RRDSET *st
        , usec_t update_every_ut
@ -1086,8 +1186,8 @@ static inline size_t rrdset_done_interpolate(

            if(unlikely(!store_this_entry)) {
                (void) ml_is_anomalous(rd, 0, false);
-
-                rd->state->collect_ops.store_metric(rd, next_store_ut, NAN, SN_EMPTY_SLOT);
+//                rd->state->collect_ops.store_metric(rd, next_store_ut, NAN, 0, 0, 1, SN_EMPTY_SLOT, 0);
+                store_metric(rd, next_store_ut, NAN, SN_EMPTY_SLOT);
                continue;
            }

@ -1099,7 +1199,8 @@ static inline size_t rrdset_done_interpolate(
                    dim_storage_flags &= ~ ((uint32_t) SN_ANOMALY_BIT);
                }

-                rd->state->collect_ops.store_metric(rd, next_store_ut, new_value, dim_storage_flags);
+//                rd->state->collect_ops.store_metric(rd, next_store_ut, new_value, 0, 0, 1, dim_storage_flags, 0);
+                store_metric(rd, next_store_ut, new_value, dim_storage_flags);
                rd->last_stored_value = new_value;
            }
            else {
@ -1112,7 +1213,8 @@ static inline size_t rrdset_done_interpolate(
                );
                #endif

-                rd->state->collect_ops.store_metric(rd, next_store_ut, NAN, SN_EMPTY_SLOT);
+//                rd->state->collect_ops.store_metric(rd, next_store_ut, NAN, 0, 0, 1, SN_EMPTY_SLOT, 0);
+                store_metric(rd, next_store_ut, NAN, SN_EMPTY_SLOT);
                rd->last_stored_value = NAN;
            }

@ -1597,10 +1699,10 @@ after_first_database_work:
    // it is now time to interpolate values on a second boundary

 #ifdef NETDATA_INTERNAL_CHECKS
-    if(unlikely(now_collect_ut < next_store_ut)) {
+    if(unlikely(now_collect_ut < next_store_ut && st->counter_done > 1)) {
        // this is collected in the same interpolation point
        rrdset_debug(st, "THIS IS IN THE SAME INTERPOLATION POINT");
-        info("INTERNAL CHECK: host '%s', chart '%s' is collected in the same interpolation point: short by %llu microseconds", st->rrdhost->hostname, st->name, next_store_ut - now_collect_ut);
+        info("INTERNAL CHECK: host '%s', chart '%s' collection %zu is in the same interpolation point: short by %llu microseconds", st->rrdhost->hostname, st->name, st->counter_done, next_store_ut - now_collect_ut);
    }
 #endif

@ -1734,10 +1836,22 @@ after_second_database_work:

                        rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE);
                        /* only a collector can mark a chart as obsolete, so we must remove the reference */
-                        uint8_t can_delete_metric = rd->state->collect_ops.finalize(rd);
-                        if (can_delete_metric) {
+
+                        size_t tiers_available = 0, tiers_said_yes = 0;
+                        for(int tier = 0; tier < storage_tiers ;tier++) {
+                            if(rd->tiers[tier]) {
+                                tiers_available++;
+
+                                if(rd->tiers[tier]->collect_ops.finalize(rd->tiers[tier]->db_collection_handle))
+                                    tiers_said_yes++;
+
+                                rd->tiers[tier]->db_collection_handle = NULL;
+                            }
+                        }
+
+                        if (tiers_available == tiers_said_yes && tiers_said_yes) {
                            /* This metric has no data and no references */
-                            delete_dimension_uuid(&rd->state->metric_uuid);
+                            delete_dimension_uuid(&rd->metric_uuid);
                        } else {
                            /* Do not delete this dimension */
 #ifdef ENABLE_ACLK
--- a/database/sqlite/sqlite_aclk_chart.c
+++ b/database/sqlite/sqlite_aclk_chart.c
@ -611,7 +611,7 @@ void aclk_receive_chart_reset(struct aclk_database_worker_config *wc, struct acl
                rrddim_foreach_read(rd, st)
                {
                    rrddim_flag_clear(rd, RRDDIM_FLAG_ACLK);
-                    rd->state->aclk_live_status = (rd->state->aclk_live_status == 0);
+                    rd->aclk_live_status = (rd->aclk_live_status == 0);
                }
                rrdset_unlock(st);
            }
@ -927,7 +927,7 @@ void aclk_update_retention(struct aclk_database_worker_config *wc)
 #ifdef ENABLE_DBENGINE
        if (memory_mode == RRD_MEMORY_MODE_DBENGINE)
            rc =
-                rrdeng_metric_latest_time_by_uuid((uuid_t *)sqlite3_column_blob(res, 0), &first_entry_t, &last_entry_t);
+                rrdeng_metric_latest_time_by_uuid((uuid_t *)sqlite3_column_blob(res, 0), &first_entry_t, &last_entry_t, 0);
        else
 #endif
        {
@ -1088,15 +1088,15 @@ void queue_dimension_to_aclk(RRDDIM *rd, time_t last_updated)
 {
    int live = !last_updated;

-    if (likely(rd->state->aclk_live_status == live))
+    if (likely(rd->aclk_live_status == live))
        return;

-    time_t created_at = rd->state->query_ops.oldest_time(rd);
+    time_t created_at = rd->tiers[0]->query_ops.oldest_time(rd->tiers[0]->db_metric_handle);

    if (unlikely(!created_at && rd->updated))
       created_at = rd->last_collected_time.tv_sec;

-    rd->state->aclk_live_status = live;
+    rd->aclk_live_status = live;

    struct aclk_database_worker_config *wc = rd->rrdset->rrdhost->dbsync_worker;
    if (unlikely(!wc))
@ -1124,7 +1124,7 @@ void queue_dimension_to_aclk(RRDDIM *rd, time_t last_updated)
        return;

    struct aclk_chart_dimension_data *aclk_cd_data = mallocz(sizeof(*aclk_cd_data));
-    uuid_copy(aclk_cd_data->uuid, rd->state->metric_uuid);
+    uuid_copy(aclk_cd_data->uuid, rd->metric_uuid);
    aclk_cd_data->payload = payload;
    aclk_cd_data->payload_size = size;
    aclk_cd_data->check_payload = 1;
@ -1139,7 +1139,7 @@ void queue_dimension_to_aclk(RRDDIM *rd, time_t last_updated)
    if (unlikely(rc)) {
        freez(aclk_cd_data->payload);
        freez(aclk_cd_data);
-        rd->state->aclk_live_status = !live;
+        rd->aclk_live_status = !live;
    }
    return;
 }
@ -1156,11 +1156,11 @@ void aclk_send_dimension_update(RRDDIM *rd)
    time_t now = now_realtime_sec();
    int live = ((now - rd->last_collected_time.tv_sec) < (RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * rd->update_every));

-    if (!live || rd->state->aclk_live_status != live || !first_entry_t) {
+    if (!live || rd->aclk_live_status != live || !first_entry_t) {
        (void)aclk_upd_dimension_event(
            rd->rrdset->rrdhost->dbsync_worker,
            claim_id,
-            &rd->state->metric_uuid,
+            &rd->metric_uuid,
            rd->id,
            rd->name,
            rd->rrdset->id,
@ -1189,7 +1189,7 @@ void aclk_send_dimension_update(RRDDIM *rd)
                first_entry_t,
                last_entry_t,
                now - last_entry_t);
-        rd->state->aclk_live_status = live;
+        rd->aclk_live_status = live;
    }

    freez(claim_id);
--- a/database/sqlite/sqlite_functions.c
+++ b/database/sqlite/sqlite_functions.c
@ -1369,8 +1369,10 @@ RRDHOST *sql_create_host_by_uuid(char *hostname)

    host->system_info = callocz(1, sizeof(*host->system_info));;
    rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED);
+
 #ifdef ENABLE_DBENGINE
-    host->rrdeng_ctx = &multidb_ctx;
+    for(int tier = 0; tier < storage_tiers ; tier++)
+        host->storage_instance[tier] = (STORAGE_INSTANCE *)multidb_ctx[tier];
 #endif

 failed:
@ -1538,7 +1540,7 @@ failed:
 }

 int find_dimension_first_last_t(char *machine_guid, char *chart_id, char *dim_id,
-                                uuid_t *uuid, time_t *first_entry_t, time_t *last_entry_t, uuid_t *rrdeng_uuid)
+                                uuid_t *uuid, time_t *first_entry_t, time_t *last_entry_t, uuid_t *rrdeng_uuid, int tier)
 {
 #ifdef ENABLE_DBENGINE
    int rc;
@ -1546,13 +1548,13 @@ int find_dimension_first_last_t(char *machine_guid, char *chart_id, char *dim_id
    uuid_t  multihost_legacy_uuid;
    time_t dim_first_entry_t, dim_last_entry_t;

-    rc = rrdeng_metric_latest_time_by_uuid(uuid, &dim_first_entry_t, &dim_last_entry_t);
+    rc = rrdeng_metric_latest_time_by_uuid(uuid, &dim_first_entry_t, &dim_last_entry_t, tier);
    if (unlikely(rc)) {
        rrdeng_generate_legacy_uuid(dim_id, chart_id, &legacy_uuid);
-        rc = rrdeng_metric_latest_time_by_uuid(&legacy_uuid, &dim_first_entry_t, &dim_last_entry_t);
+        rc = rrdeng_metric_latest_time_by_uuid(&legacy_uuid, &dim_first_entry_t, &dim_last_entry_t, tier);
        if (likely(rc)) {
            rrdeng_convert_legacy_uuid_to_multihost(machine_guid, &legacy_uuid, &multihost_legacy_uuid);
-            rc = rrdeng_metric_latest_time_by_uuid(&multihost_legacy_uuid, &dim_first_entry_t, &dim_last_entry_t);
+            rc = rrdeng_metric_latest_time_by_uuid(&multihost_legacy_uuid, &dim_first_entry_t, &dim_last_entry_t, tier);
            if (likely(!rc))
                uuid_copy(*rrdeng_uuid, multihost_legacy_uuid);
        }
@ -1578,27 +1580,35 @@ int find_dimension_first_last_t(char *machine_guid, char *chart_id, char *dim_id
    return 1;
 #endif
 }
-
+#include "../storage_engine.h"
 #ifdef ENABLE_DBENGINE
 static RRDDIM *create_rrdim_entry(ONEWAYALLOC *owa, RRDSET *st, char *id, char *name, uuid_t *metric_uuid)
 {
    RRDDIM *rd = onewayalloc_callocz(owa, 1, sizeof(*rd));
    rd->rrdset = st;
+    rd->update_every = st->update_every;
    rd->last_stored_value = NAN;
    rrddim_flag_set(rd, RRDDIM_FLAG_NONE);
-    rd->state = onewayalloc_mallocz(owa, sizeof(*rd->state));
-    rd->rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
-    rd->state->query_ops.init = rrdeng_load_metric_init;
-    rd->state->query_ops.next_metric = rrdeng_load_metric_next;
-    rd->state->query_ops.is_finished = rrdeng_load_metric_is_finished;
-    rd->state->query_ops.finalize = rrdeng_load_metric_finalize;
-    rd->state->query_ops.latest_time = rrdeng_metric_latest_time;
-    rd->state->query_ops.oldest_time = rrdeng_metric_oldest_time;
-    rd->state->rrdeng_uuid = onewayalloc_mallocz(owa, sizeof(uuid_t));
-    uuid_copy(*rd->state->rrdeng_uuid, *metric_uuid);
-    uuid_copy(rd->state->metric_uuid, *metric_uuid);
+    STORAGE_ENGINE *eng = storage_engine_get(RRD_MEMORY_MODE_DBENGINE);
+
+    uuid_copy(rd->metric_uuid, *metric_uuid);
    rd->id = onewayalloc_strdupz(owa, id);
    rd->name = onewayalloc_strdupz(owa, name);
+
+    for(int tier = 0; tier < storage_tiers ;tier++) {
+        rd->tiers[tier] = onewayalloc_callocz(owa, 1, sizeof(*rd->tiers[tier]));
+        rd->rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
+        rd->tiers[tier]->tier_grouping = get_tier_grouping(tier);
+        rd->tiers[tier]->mode = RRD_MEMORY_MODE_DBENGINE;
+        rd->tiers[tier]->query_ops.init = rrdeng_load_metric_init;
+        rd->tiers[tier]->query_ops.next_metric = rrdeng_load_metric_next;
+        rd->tiers[tier]->query_ops.is_finished = rrdeng_load_metric_is_finished;
+        rd->tiers[tier]->query_ops.finalize = rrdeng_load_metric_finalize;
+        rd->tiers[tier]->query_ops.latest_time = rrdeng_metric_latest_time;
+        rd->tiers[tier]->query_ops.oldest_time = rrdeng_metric_oldest_time;
+        rd->tiers[tier]->db_metric_handle = eng->api.init(rd, st->rrdhost->storage_instance[tier]);
+    }
+
    return rd;
 }
 #endif
@ -1697,7 +1707,7 @@ void sql_build_context_param_list(ONEWAYALLOC  *owa, struct context_param **para

        if (unlikely(find_dimension_first_last_t(machine_guid, (char *)st->name, (char *)sqlite3_column_text(res, 1),
                (uuid_t *)sqlite3_column_blob(res, 0), &(*param_list)->first_entry_t, &(*param_list)->last_entry_t,
-                &rrdeng_uuid)))
+                &rrdeng_uuid, 0)))
            continue;

        st->counter++;
--- a/database/storage_engine.c
+++ b/database/storage_engine.c
@ -9,6 +9,7 @@
 #define im_collect_ops { \
    .init = rrddim_collect_init,\
    .store_metric = rrddim_collect_store_metric,\
+    .flush = rrddim_store_metric_flush,\
    .finalize = rrddim_collect_finalize\
 }

@ -26,6 +27,8 @@ static STORAGE_ENGINE engines[] = {
        .id = RRD_MEMORY_MODE_NONE,
        .name = RRD_MEMORY_MODE_NONE_NAME,
        .api = {
+            .init = rrddim_metric_init,
+            .free = rrddim_metric_free,
            .collect_ops = im_collect_ops,
            .query_ops = im_query_ops
        }
@ -34,6 +37,8 @@ static STORAGE_ENGINE engines[] = {
        .id = RRD_MEMORY_MODE_RAM,
        .name = RRD_MEMORY_MODE_RAM_NAME,
        .api = {
+            .init = rrddim_metric_init,
+            .free = rrddim_metric_free,
            .collect_ops = im_collect_ops,
            .query_ops = im_query_ops
        }
@ -42,6 +47,8 @@ static STORAGE_ENGINE engines[] = {
        .id = RRD_MEMORY_MODE_MAP,
        .name = RRD_MEMORY_MODE_MAP_NAME,
        .api = {
+            .init = rrddim_metric_init,
+            .free = rrddim_metric_free,
            .collect_ops = im_collect_ops,
            .query_ops = im_query_ops
        }
@ -50,6 +57,8 @@ static STORAGE_ENGINE engines[] = {
        .id = RRD_MEMORY_MODE_SAVE,
        .name = RRD_MEMORY_MODE_SAVE_NAME,
        .api = {
+            .init = rrddim_metric_init,
+            .free = rrddim_metric_free,
            .collect_ops = im_collect_ops,
            .query_ops = im_query_ops
        }
@ -58,6 +67,8 @@ static STORAGE_ENGINE engines[] = {
        .id = RRD_MEMORY_MODE_ALLOC,
        .name = RRD_MEMORY_MODE_ALLOC_NAME,
        .api = {
+            .init = rrddim_metric_init,
+            .free = rrddim_metric_free,
            .collect_ops = im_collect_ops,
            .query_ops = im_query_ops
        }
@ -67,9 +78,12 @@ static STORAGE_ENGINE engines[] = {
        .id = RRD_MEMORY_MODE_DBENGINE,
        .name = RRD_MEMORY_MODE_DBENGINE_NAME,
        .api = {
+            .init = rrdeng_metric_init,
+            .free = rrdeng_metric_free,
            .collect_ops = {
                .init = rrdeng_store_metric_init,
                .store_metric = rrdeng_store_metric_next,
+                .flush = rrdeng_store_metric_flush_current_page,
                .finalize = rrdeng_store_metric_finalize
            },
            .query_ops = {
--- a/database/storage_engine.h
+++ b/database/storage_engine.h
@ -10,6 +10,8 @@ typedef struct storage_engine STORAGE_ENGINE;
 // ------------------------------------------------------------------------
 // function pointers for all APIs provided by a storge engine
 typedef struct storage_engine_api {
+    STORAGE_METRIC_HANDLE *(*init)(RRDDIM *rd, STORAGE_INSTANCE *instance);
+    void (*free)(STORAGE_METRIC_HANDLE *);
    struct rrddim_collect_ops collect_ops;
    struct rrddim_query_ops query_ops;
 } STORAGE_ENGINE_API;
--- a/docs/guides/longer-metrics-storage.md
+++ b/docs/guides/longer-metrics-storage.md
@ -1,150 +1,158 @@
 <!--
-title: "Change how long Netdata stores metrics"
-description: "With a single configuration change, the Netdata Agent can store days, weeks, or months of metrics at its famous per-second granularity."
+title: "Netdata Longer Metrics Retention"
+description: ""
 custom_edit_url: https://github.com/netdata/netdata/edit/master/docs/guides/longer-metrics-storage.md
 -->

-# Change how long Netdata stores metrics
+# Netdata Longer Metrics Retention

-Netdata helps you collect thousands of system and application metrics every second, but what about storing them for the
-long term?
+Metrics retention affects 3 parameters on the operation of a Netdata Agent:

-Many people think Netdata can only store about an hour's worth of real-time metrics, but that's simply not true any
-more. With the right settings, Netdata is quite capable of efficiently storing hours or days worth of historical,
-per-second metrics without having to rely on an [exporting engine](/docs/export/external-databases.md).
+1. The disk space required to store the metrics.
+2. The memory the Netdata Agent will require to have that retention available for queries.
+3. The CPU resources that will be required to query longer time-frames.

-This guide gives two options for configuring Netdata to store more metrics. **We recommend the default [database
-engine](#using-the-database-engine)**, but you can stick with or switch to the round-robin database if you prefer.
+As retention increases, the resources required to support that retention increase too.

-Let's get started.
+Since Netdata Agents usually run at the edge, inside production systems, Netdata Agent **parents** should be considered. When having a **parent - child** setup, the child (the Netdata Agent running on a production system) delegates all its functions, including longer metrics retention and querying, to the parent node that can dedicate more resources to this task. A single Netdata Agent parent can centralize multiple children Netdata Agents (dozens, hundreds, or even thousands depending on its available resources). 

-## Using the database engine

-The database engine uses RAM to store recent metrics while also using a "spill to disk" feature that takes advantage of
-available disk space for long-term metrics storage. This feature of the database engine allows you to store a much
-larger dataset than your system's available RAM.
+## Ephemerality of metrics

-The database engine is currently the default method of storing metrics, but if you're not sure which database you're
-using, check out your `netdata.conf` file and look for the `[db].mode` setting:
+The ephemerality of metrics plays an important role in retention. In environments where metrics stop being collected and new metrics are constantly being generated, we are interested about 2 parameters:

-```conf
+1. The **expected concurrent number of metrics** as an average for the lifetime of the database.
+   This affects mainly the storage requirements.
+
+2. The **expected total number of unique metrics** for the lifetime of the database.
+   This affects mainly the memory requirements for having all these metrics indexed and available to be queried.
+
+## Granularity of metrics
+
+The granularity of metrics (the frequency they are collected and stored, i.e. their resolution) is significantly affecting retention.
+
+Lowering the granularity from per second to every two seconds, will double their retention and half the CPU requirements of the Netdata Agent, without affecting disk space or memory requirements.
+
+## Which database mode to use
+
+Netdata Agents support multiple database modes.
+
+The default mode `[db].mode = dbengine` has been designed to scale for longer retentions.
+
+The other available database modes are designed to minimize resource utilization and should usually be considered on **parent - child** setups at the children side.
+
+So,
+
+* On a single node setup, use `[db].mode = dbengine` to increase retention.
+* On a **parent - child** setup, use `[db].mode = dbengine` on the parent to increase retention and a more resource efficient mode (like `save`, `ram` or `none`) for the child to minimize resources utilization.
+
+To use `dbengine`, set this in `netdata.conf` (it is the default):
+
+```
 [db]
    mode = dbengine
 ```

-If `[db].mode` is set to anything but `dbengine`, change it and restart Netdata using the standard command for
-restarting services on your system. You're now using the database engine!
+## Tiering

-What makes the database engine efficient? While it's structured like a traditional database, the database engine splits
-data between RAM and disk. The database engine caches and indexes data on RAM to keep memory usage low, and then
-compresses older metrics onto disk for long-term storage.
+`dbengine` supports tiering. Tiering allows having up to 3 versions of the data:

-When the Netdata dashboard queries for historical metrics, the database engine will use its cache, stored in RAM, to
-return relevant metrics for visualization in charts.
+1. Tier 0 is the high resolution data.
+2. Tier 1 is the first tier that samples data every 60 data collections of Tier 0.
+3. Tier 2 is the second tier that samples data every 3600 data collections of Tier 0 (60 of Tier 1).

-Now, given that the database engine uses _both_ RAM and disk, there are two other settings to consider: `page cache
-size MB` and `dbengine multihost disk space MB`.
+To enable tiering set `[db].storage tiers` in `netdata.conf` (the default is 1, to enable only Tier 0):

-```conf
+```
 [db]
-    page cache size MB = 32
-    dbengine multihost disk space MB = 256
+    mode = dbengine
+    storage tiers = 3
 ```

-`[db].page cache size MB` sets the maximum amount of RAM the database engine will use for caching and indexing.
-`[db].dbengine multihost disk space MB` sets the maximum disk space the database engine will use for storing
-compressed metrics. The default settings retain about four day's worth of metrics on a system collecting 2,000 metrics
-every second.
+## Disk space requirements

-[**See our database engine
-calculator**](/docs/store/change-metrics-storage.md#calculate-the-system-resources-ram-disk-space-needed-to-store-metrics)
-to help you correctly set `[db].dbengine multihost disk space MB` based on your needs. The calculator gives an accurate estimate
-based on how many child nodes you have, how many metrics your Agent collects, and more.
+Netdata Agents require about 1 bytes on disk per database point on Tier 0 and 4 times more on higher tiers (Tier 1 and 2). They require 4 times more storage per point compared to Tier 0, because for every point higher tiers store `min`, `max`, `sum`, `count` and `anomaly rate` (the values are 5, but they require 4 times the storage because `count` and `anomaly rate` are 16-bit integers). The `average` is calculated on the fly at query time using `sum / count`.

-With the database engine active, you can back up your `/var/cache/netdata/dbengine/` folder to another location for
-redundancy.
+### Tier 0 - per second for a week

-Now that you know how to switch to the database engine, let's cover the default round-robin database for those who
-aren't ready to make the move.
+For 2000 metrics, collected every second and retained for a week, Tier 0 needs: 1 byte x 2000 metrics x 3600 secs per hour x 24 hours per day x 7 days per week = 1100MB.

-## Using the round-robin database
+The setting to control this is in `netdata.conf`:

-In previous versions, Netdata used a round-robin database to store 1 hour of per-second metrics. 
-
-To see if you're still using this database, or if you would like to switch to it, open your `netdata.conf` file and see
-if `[db].mode` option is set to `save`.
-
-```conf
+```
 [db]
-    mode = save
+    mode = dbengine
+    
+    # per second data collection
+    update every = 1
+    
+    # enable only Tier 0
+    storage tiers = 1
+    
+    # Tier 0, per second data for a week
+    dbengine multihost disk space MB = 1100
 ```

-If `[db].mode` is set to `save`, then you're using the round-robin database. If so, the `[db].retention` option is set to
-`3600`, which is the equivalent to 3,600 seconds, or one hour. 
+By setting it to `1100` and restarting the Netdata Agent, this node will start maintaining about a week of data. But pay attention to the number of metrics. If you have more than 2000 metrics on a node, or you need more that a week of high resolution metrics, you may need to adjust this setting accordingly.

-To increase your historical metrics, you can increase `[db].retention` to the number of seconds you'd like to store:
+### Tier 1 - per minute for a month

-```conf
+Tier 1 is by default sampling the data every 60 points of Tier 0. If Tier 0 is per second, then Tier 1 is per minute.
+
+Tier 1 needs 4 times more storage per point compared to Tier 0. So, for 2000 metrics, with per minute resolution, retained for a month, Tier 1 needs: 4 bytes x 2000 metrics x 60 minutes per hour x 24 hours per day x 30 days per month = 330MB.
+
+Do this in `netdata.conf`:
+
+```
 [db]
-    # 2 hours = 2 * 60 * 60 = 7200 seconds
-    retention = 7200
-    # 4 hours = 4 * 60 * 60 = 14440 seconds
-    retention = 14440
-    # 24 hours = 24 * 60 * 60 = 86400 seconds
-    retention = 86400
+    mode = dbengine
+    
+    # per second data collection
+    update every = 1
+    
+    # enable only Tier 0 and Tier 1
+    storage tiers = 2
+    
+    # Tier 0, per second data for a week
+    dbengine multihost disk space MB = 1100
+    
+    # Tier 1, per minute data for a month
+    dbengine tier 1 multihost disk space MB = 330
 ```

-And so on.
+Once `netdata.conf` is edited, the Netdata Agent needs to be restarted for the changes to take effect.

-Next, check to see how many metrics Netdata collects on your system, and how much RAM that uses. Visit the Netdata
-dashboard and look at the bottom-right corner of the interface. You'll find a sentence similar to the following:
+### Tier 2 - per hour for a year

-> Every second, Netdata collects 1,938 metrics, presents them in 299 charts and monitors them with 81 alarms. Netdata is
-> using 25 MB of memory on **netdata-linux** for 1 hour, 6 minutes and 36 seconds of real-time history.
+Tier 2 is by default sampling data every 3600 points of Tier 0 (60 of Tier 1). If Tier 0 is per second, then Tier 2 is per hour.

-On this desktop system, using a Ryzen 5 1600 and 16GB of RAM, the round-robin databases uses 25 MB of RAM to store just
-over an hour's worth of data for nearly 2,000 metrics.
+The storage requirements are the same to Tier 1.

-You should base this number on two things: How much history you need for your use case, and how much RAM you're willing
-to dedicate to Netdata.
+For 2000 metrics, with per hour resolution, retained for a year, Tier 2 needs: 4 bytes x 2000 metrics x 24 hours per day x 365 days per year = 67MB.

-How much RAM will a longer retention use? Let's use a little math.
+Do this in `netdata.conf`:

-The round-robin database needs 4 bytes for every value Netdata collects. If Netdata collects metrics every second,
-that's 4 bytes, per second, per metric.
+```
+[db]
+    mode = dbengine
+    
+    # per second data collection
+    update every = 1
+    
+    # enable only Tier 0 and Tier 1
+    storage tiers = 3
+    
+    # Tier 0, per second data for a week
+    dbengine multihost disk space MB = 1100
+    
+    # Tier 1, per minute data for a month
+    dbengine tier 1 multihost disk space MB = 330

-```text
-4 bytes * X seconds * Y metrics = RAM usage in bytes
+    # Tier 2, per hour data for a year
+    dbengine tier 2 multihost disk space MB = 67
 ```

-Let's assume your system collects 1,000 metrics per second.
-
-```text
-4 bytes * 3600 seconds * 1,000 metrics = 14400000 bytes = 14.4 MB RAM
-```
-
-With that formula, you can calculate the RAM usage for much larger history settings.
-
-```conf
-# 2 hours at 1,000 metrics per second
-4 bytes * 7200 seconds * 1,000 metrics = 28800000 bytes = 28.8 MB RAM
-# 2 hours at 2,000 metrics per second
-4 bytes * 7200 seconds * 2,000 metrics = 57600000 bytes = 57.6 MB RAM
-# 4 hours at 2,000 metrics per second
-4 bytes * 14440 seconds * 2,000 metrics = 115520000 bytes = 115.52 MB RAM
-# 24 hours at 1,000 metrics per second
-4 bytes * 86400 seconds * 1,000 metrics = 345600000 bytes = 345.6 MB RAM
-```
-
-## What's next?
-
-Now that you have either configured database engine or round-robin database engine to store more metrics, you'll
-probably want to see it in action!
-
-For more information about how to pan charts to view historical metrics, see our documentation on [using
-charts](/web/README.md#using-charts).
-
-And if you'd now like to reduce Netdata's resource usage, view our [performance
-guide](/docs/guides/configure/performance.md) for our best practices on optimization.
+Once `netdata.conf` is edited, the Netdata Agent needs to be restarted for the changes to take effect.
+


--- a/exporting/process_data.c
+++ b/exporting/process_data.c
@ -77,8 +77,8 @@ NETDATA_DOUBLE exporting_calculate_value_from_stored_data(
    time_t before = instance->before;

    // find the edges of the rrd database for this chart
-    time_t first_t = rd->state->query_ops.oldest_time(rd);
-    time_t last_t = rd->state->query_ops.latest_time(rd);
+    time_t first_t = rd->tiers[0]->query_ops.oldest_time(rd->tiers[0]->db_metric_handle);
+    time_t last_t = rd->tiers[0]->query_ops.latest_time(rd->tiers[0]->db_metric_handle);
    time_t update_every = st->update_every;
    struct rrddim_query_handle handle;

@ -124,23 +124,20 @@ NETDATA_DOUBLE exporting_calculate_value_from_stored_data(

    size_t counter = 0;
    NETDATA_DOUBLE sum = 0;
-    NETDATA_DOUBLE value;

-    for (rd->state->query_ops.init(rd, &handle, after, before); !rd->state->query_ops.is_finished(&handle);) {
-        time_t curr_t, end_t;
-        SN_FLAGS flags;
-        value = rd->state->query_ops.next_metric(&handle, &curr_t, &end_t, &flags);
+    for (rd->tiers[0]->query_ops.init(rd->tiers[0]->db_metric_handle, &handle, after, before, TIER_QUERY_FETCH_SUM); !rd->tiers[0]->query_ops.is_finished(&handle);) {
+        STORAGE_POINT sp = rd->tiers[0]->query_ops.next_metric(&handle);

-        if (unlikely(!netdata_double_isnumber(value))) {
+        if (unlikely(storage_point_is_empty(sp))) {
            // not collected
            continue;
        }

-        sum += value;
-
-        counter++;
+        sum += sp.sum;
+        counter += sp.count;
    }
-    rd->state->query_ops.finalize(&handle);
+    rd->tiers[0]->query_ops.finalize(&handle);
+
    if (unlikely(!counter)) {
        debug(
            D_EXPORTING,
--- a/exporting/tests/exporting_fixtures.c
+++ b/exporting/tests/exporting_fixtures.c
@ -63,13 +63,13 @@ int setup_rrdhost()
    rd->collections_counter++;
    rd->next = NULL;

-    rd->state = calloc(1, sizeof(*rd->state));
-    rd->state->query_ops.oldest_time = __mock_rrddim_query_oldest_time;
-    rd->state->query_ops.latest_time = __mock_rrddim_query_latest_time;
-    rd->state->query_ops.init = __mock_rrddim_query_init;
-    rd->state->query_ops.is_finished = __mock_rrddim_query_is_finished;
-    rd->state->query_ops.next_metric = __mock_rrddim_query_next_metric;
-    rd->state->query_ops.finalize = __mock_rrddim_query_finalize;
+    rd->tiers[0] = calloc(1, sizeof(struct rrddim_tier));
+    rd->tiers[0]->query_ops.oldest_time = __mock_rrddim_query_oldest_time;
+    rd->tiers[0]->query_ops.latest_time = __mock_rrddim_query_latest_time;
+    rd->tiers[0]->query_ops.init = __mock_rrddim_query_init;
+    rd->tiers[0]->query_ops.is_finished = __mock_rrddim_query_is_finished;
+    rd->tiers[0]->query_ops.next_metric = __mock_rrddim_query_next_metric;
+    rd->tiers[0]->query_ops.finalize = __mock_rrddim_query_finalize;

    return 0;
 }
@ -79,7 +79,7 @@ int teardown_rrdhost()
    RRDDIM *rd = localhost->rrdset_root->dimensions;
    free((void *)rd->name);
    free((void *)rd->id);
-    free(rd->state);
+    free(rd->tiers[0]);
    free(rd);

    RRDSET *st = localhost->rrdset_root;
--- a/exporting/tests/netdata_doubles.c
+++ b/exporting/tests/netdata_doubles.c
@ -196,26 +196,27 @@ void rrdset_update_heterogeneous_flag(RRDSET *st)
    (void)st;
 }

-time_t __mock_rrddim_query_oldest_time(RRDDIM *rd)
+time_t __mock_rrddim_query_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle)
 {
-    (void)rd;
+    (void)db_metric_handle;

    function_called();
    return mock_type(time_t);
 }

-time_t __mock_rrddim_query_latest_time(RRDDIM *rd)
+time_t __mock_rrddim_query_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle)
 {
-    (void)rd;
+    (void)db_metric_handle;

    function_called();
    return mock_type(time_t);
 }

-void __mock_rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time)
+void __mock_rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type)
 {
-    (void)rd;
+    (void)db_metric_handle;
    (void)handle;
+    (void)tier_query_fetch_type;

    function_called();
    check_expected(start_time);
@ -230,16 +231,14 @@ int __mock_rrddim_query_is_finished(struct rrddim_query_handle *handle)
    return mock_type(int);
 }

-NETDATA_DOUBLE __mock_rrddim_query_next_metric(struct rrddim_query_handle *handle, time_t *start_time, time_t *end_time, SN_FLAGS *flags)
+STORAGE_POINT __mock_rrddim_query_next_metric(struct rrddim_query_handle *handle)
 {
    (void)handle;
-    (void)start_time;
-    (void)end_time;
-    (void) flags;
-

    function_called();
-    return mock_type(NETDATA_DOUBLE);
+
+    STORAGE_POINT sp = {};
+    return sp;    
 }

 void __mock_rrddim_query_finalize(struct rrddim_query_handle *handle)
--- a/exporting/tests/test_exporting_engine.c
+++ b/exporting/tests/test_exporting_engine.c
@ -307,12 +307,10 @@ static void test_exporting_calculate_value_from_stored_data(void **state)
    expect_function_call(__mock_rrddim_query_is_finished);
    will_return(__mock_rrddim_query_is_finished, 0);
    expect_function_call(__mock_rrddim_query_next_metric);
-    will_return(__mock_rrddim_query_next_metric, 27);

    expect_function_call(__mock_rrddim_query_is_finished);
    will_return(__mock_rrddim_query_is_finished, 0);
    expect_function_call(__mock_rrddim_query_next_metric);
-    will_return(__mock_rrddim_query_next_metric, 45);

    expect_function_call(__mock_rrddim_query_is_finished);
    will_return(__mock_rrddim_query_is_finished, 1);
--- a/exporting/tests/test_exporting_engine.h
+++ b/exporting/tests/test_exporting_engine.h
@ -57,11 +57,11 @@ int __wrap_connect_to_one_of(
 void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line);
 void __rrdset_check_rdlock(RRDSET *st, const char *file, const char *function, const unsigned long line);
 void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line);
-time_t __mock_rrddim_query_oldest_time(RRDDIM *rd);
-time_t __mock_rrddim_query_latest_time(RRDDIM *rd);
-void __mock_rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time);
+time_t __mock_rrddim_query_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle);
+time_t __mock_rrddim_query_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle);
+void __mock_rrddim_query_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type);
 int __mock_rrddim_query_is_finished(struct rrddim_query_handle *handle);
-NETDATA_DOUBLE __mock_rrddim_query_next_metric(struct rrddim_query_handle *handle, time_t *start_time, time_t *end_time, SN_FLAGS *flags);
+STORAGE_POINT __mock_rrddim_query_next_metric(struct rrddim_query_handle *handle);
 void __mock_rrddim_query_finalize(struct rrddim_query_handle *handle);

 // -----------------------------------------------------------------------
--- a/health/health.c
+++ b/health/health.c
@ -859,7 +859,7 @@ void *health_main(void *ptr) {
                                                  0, rc->options,
                                                  &rc->db_after,&rc->db_before,
                                                  NULL, NULL,
-                                                  &value_is_null, NULL, 0);
+                                                  &value_is_null, NULL, 0, 0);

                    if (unlikely(ret != 200)) {
                        // database lookup failed
--- a/libnetdata/libnetdata.h
+++ b/libnetdata/libnetdata.h
@ -351,6 +351,8 @@ extern char *netdata_configured_host_prefix;
 // BEWARE: Outside of the C code this also exists in alarm-notify.sh
 #define DEFAULT_CLOUD_BASE_URL "https://app.netdata.cloud"

+#define RRD_STORAGE_TIERS 5
+
 # ifdef __cplusplus
 }
 # endif
--- a/libnetdata/log/log.h
+++ b/libnetdata/log/log.h
@ -82,7 +82,7 @@ static inline void debug_dummy(void) {}

 #ifdef NETDATA_INTERNAL_CHECKS
 #define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
-#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("INTERNAL ERROR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
+#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
 #else
 #define debug(type, args...) debug_dummy()
 #define internal_error(args...) debug_dummy()
--- a/libnetdata/onewayalloc/onewayalloc.c
+++ b/libnetdata/onewayalloc/onewayalloc.c
@ -166,6 +166,14 @@ void onewayalloc_freez(ONEWAYALLOC *owa __maybe_unused, const void *ptr __maybe_
    return;
 }

+void *onewayalloc_doublesize(ONEWAYALLOC *owa, const void *src, size_t oldsize) {
+    size_t newsize = oldsize * 2;
+    void *dst = onewayalloc_mallocz(owa, newsize);
+    memcpy(dst, src, oldsize);
+    onewayalloc_freez(owa, src);
+    return dst;
+}
+
 void onewayalloc_destroy(ONEWAYALLOC *owa) {
    if(!owa) return;

--- a/libnetdata/onewayalloc/onewayalloc.h
+++ b/libnetdata/onewayalloc/onewayalloc.h
@ -14,4 +14,6 @@ extern char *onewayalloc_strdupz(ONEWAYALLOC *owa, const char *s);
 extern void *onewayalloc_memdupz(ONEWAYALLOC *owa, const void *src, size_t size);
 extern void onewayalloc_freez(ONEWAYALLOC *owa, const void *ptr);

+extern void *onewayalloc_doublesize(ONEWAYALLOC *owa, const void *src, size_t oldsize);
+
 #endif // ONEWAYALLOC_H
--- a/libnetdata/storage_number/storage_number.h
+++ b/libnetdata/storage_number/storage_number.h
@ -63,6 +63,15 @@ typedef long long collected_number;
 #endif

 typedef uint32_t storage_number;
+
+typedef struct storage_number_tier1 {
+    float sum_value;
+    float min_value;
+    float max_value;
+    uint16_t count;
+    uint16_t anomaly_count;
+} storage_number_tier1_t;
+
 #define STORAGE_NUMBER_FORMAT "%u"

 typedef enum {
--- a/ml/Dimension.h
+++ b/ml/Dimension.h
@ -12,13 +12,13 @@ namespace ml {

 class RrdDimension {
 public:
-    RrdDimension(RRDDIM *RD) : RD(RD), Ops(&RD->state->query_ops) { }
+    RrdDimension(RRDDIM *RD) : RD(RD), Ops(&RD->tiers[0]->query_ops) { }

    RRDDIM *getRD() const { return RD; }

-    time_t latestTime() { return Ops->latest_time(RD); }
+    time_t latestTime() { return Ops->latest_time(RD->tiers[0]->db_metric_handle); }

-    time_t oldestTime() { return Ops->oldest_time(RD); }
+    time_t oldestTime() { return Ops->oldest_time(RD->tiers[0]->db_metric_handle); }

    unsigned updateEvery() const { return RD->update_every; }

--- a/ml/Query.h
+++ b/ml/Query.h
@ -8,19 +8,19 @@ namespace ml {
 class Query {
 public:
    Query(RRDDIM *RD) : RD(RD) {
-        Ops = &RD->state->query_ops;
+        Ops = &RD->tiers[0]->query_ops;
    }

    time_t latestTime() {
-        return Ops->latest_time(RD);
+        return Ops->latest_time(RD->tiers[0]->db_metric_handle);
    }

    time_t oldestTime() {
-        return Ops->oldest_time(RD);
+        return Ops->oldest_time(RD->tiers[0]->db_metric_handle);
    }

    void init(time_t AfterT, time_t BeforeT) {
-        Ops->init(RD, &Handle, AfterT, BeforeT);
+        Ops->init(RD->tiers[0]->db_metric_handle, &Handle, AfterT, BeforeT, TIER_QUERY_FETCH_SUM);
    }

    bool isFinished() {
@ -28,10 +28,8 @@ public:
    }

    std::pair<time_t, CalculatedNumber> nextMetric() {
-        time_t CurrT, EndT;
-        SN_FLAGS Flags;
-        auto Value = (CalculatedNumber)Ops->next_metric(&Handle, &CurrT, &EndT, &Flags);
-        return { CurrT, Value };
+        STORAGE_POINT sp = Ops->next_metric(&Handle);
+        return { sp.start_time, sp.sum / sp.count };
    }

    ~Query() {
--- a/ml/ml.cc
+++ b/ml/ml.cc
@ -80,12 +80,12 @@ void ml_new_dimension(RRDDIM *RD) {
        return;

    Dimension *D = new Dimension(RD);
-    RD->state->ml_dimension = static_cast<ml_dimension_t>(D);
+    RD->ml_dimension = static_cast<ml_dimension_t>(D);
    H->addDimension(D);
 }

 void ml_delete_dimension(RRDDIM *RD) {
-    Dimension *D = static_cast<Dimension *>(RD->state->ml_dimension);
+    Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
    if (!D)
        return;

@ -95,7 +95,7 @@ void ml_delete_dimension(RRDDIM *RD) {
    else
        H->removeDimension(D);

-    RD->state->ml_dimension = nullptr;
+    RD->ml_dimension = nullptr;
 }

 char *ml_get_host_info(RRDHOST *RH) {
@ -125,7 +125,7 @@ char *ml_get_host_runtime_info(RRDHOST *RH) {
 }

 bool ml_is_anomalous(RRDDIM *RD, double Value, bool Exists) {
-    Dimension *D = static_cast<Dimension *>(RD->state->ml_dimension);
+    Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
    if (!D)
        return false;

@ -210,7 +210,7 @@ void ml_process_rrdr(RRDR *R, int MaxAnomalyRates) {
 void ml_dimension_update_name(RRDSET *RS, RRDDIM *RD, const char *Name) {
    (void) RS;

-    Dimension *D = static_cast<Dimension *>(RD->state->ml_dimension);
+    Dimension *D = static_cast<Dimension *>(RD->ml_dimension);
    if (!D)
        return;

--- a/web/api/badges/web_buffer_svg.c
+++ b/web/api/badges/web_buffer_svg.c
@ -1111,7 +1111,7 @@ int web_client_api_request_v1_badge(RRDHOST *host, struct web_client *w, char *u
                                      points, after, before, group, group_options, 0, options,
                                      NULL, &latest_timestamp,
                                      NULL, NULL,
-                                      &value_is_null, NULL, 0);
+                                      &value_is_null, NULL, 0, 0);

        // if the value cannot be calculated, show empty badge
        if (ret != HTTP_RESP_OK) {
--- a/web/api/formatters/json_wrapper.c
+++ b/web/api/formatters/json_wrapper.c
@ -90,7 +90,7 @@ void rrdr_json_wrapper_begin(RRDR *r, BUFFER *wb, uint32_t format, RRDR_OPTIONS
                   , kq, kq, sq, web_client_api_request_v1_data_group_to_string(group_method), sq
                   , kq, kq, sq);

-    web_client_api_request_v1_data_options_to_string(wb, options);
+    web_client_api_request_v1_data_options_to_string(wb, r->internal.query_options);

    buffer_sprintf(wb, "%s,\n   %sdimension_names%s: [", sq, kq, kq);

@ -343,12 +343,21 @@ void rrdr_json_wrapper_begin(RRDR *r, BUFFER *wb, uint32_t format, RRDR_OPTIONS

    rrdr_buffer_print_format(wb, format);

+    buffer_sprintf(wb, "%s,\n"
+                       "   %sdb_points_per_tier%s: [ "
+                   , sq
+                   , kq, kq
+                   );
+
+    for(int tier = 0; tier < storage_tiers ; tier++)
+        buffer_sprintf(wb, "%s%zu", tier>0?", ":"", r->internal.tier_points_read[tier]);
+
+    buffer_strcat(wb, " ]");
+
    if((options & RRDR_OPTION_CUSTOM_VARS) && (options & RRDR_OPTION_JSON_WRAP)) {
-        buffer_sprintf(wb, "%s,\n   %schart_variables%s: ", sq, kq, kq);
+        buffer_sprintf(wb, ",\n   %schart_variables%s: ", kq, kq);
        health_api_v1_chart_custom_variables2json(r->st, wb);
    }
-    else
-        buffer_sprintf(wb, "%s", sq);

    buffer_sprintf(wb, ",\n   %sresult%s: ", kq, kq);

--- a/web/api/formatters/rrd2json.c
+++ b/web/api/formatters/rrd2json.c
@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-3.0-or-later

 #include "web/api/web_api_v1.h"
+#include "database/storage_engine.h"

 static inline void free_single_rrdrim(ONEWAYALLOC *owa, RRDDIM *temp_rd, int archive_mode)
 {
@ -18,7 +19,18 @@ static inline void free_single_rrdrim(ONEWAYALLOC *owa, RRDDIM *temp_rd, int arc
        }
    }

-    onewayalloc_freez(owa, temp_rd->state);
+    for(int tier = 0; tier < storage_tiers ;tier++) {
+        if(!temp_rd->tiers[tier]) continue;
+
+        if(archive_mode) {
+            STORAGE_ENGINE *eng = storage_engine_get(temp_rd->tiers[tier]->mode);
+            if (eng)
+                eng->api.free(temp_rd->tiers[tier]->db_metric_handle);
+        }
+
+        onewayalloc_freez(owa, temp_rd->tiers[tier]);
+    }
+
    onewayalloc_freez(owa, temp_rd);
 }

@ -89,7 +101,12 @@ void build_context_param_list(ONEWAYALLOC *owa, struct context_param **param_lis
        RRDDIM *rd = onewayalloc_memdupz(owa, rd1, sizeof(RRDDIM));
        rd->id = onewayalloc_strdupz(owa, rd1->id);
        rd->name = onewayalloc_strdupz(owa, rd1->name);
-        rd->state = onewayalloc_memdupz(owa, rd1->state, sizeof(*rd->state));
+        for(int tier = 0; tier < storage_tiers ;tier++) {
+            if(rd1->tiers[tier])
+                rd->tiers[tier] = onewayalloc_memdupz(owa, rd1->tiers[tier], sizeof(*rd->tiers[tier]));
+            else
+                rd->tiers[tier] = NULL;
+        }
        rd->next = (*param_list)->rd;
        (*param_list)->rd = rd;
    }
@ -168,6 +185,7 @@ int rrdset2value_api_v1(
        , int *value_is_null
        , uint8_t *anomaly_rate
        , int timeout
+        , int tier
 ) {
    int ret = HTTP_RESP_INTERNAL_SERVER_ERROR;

@ -175,7 +193,7 @@ int rrdset2value_api_v1(

    RRDR *r = rrd2rrdr(owa, st, points, after, before,
                       group_method, group_time, options, dimensions, NULL,
-                       group_options, timeout);
+                       group_options, timeout, tier);

    if(!r) {
        if(value_is_null) *value_is_null = 1;
@ -232,6 +250,7 @@ int rrdset2anything_api_v1(
        , long group_time
        , uint32_t options
        , time_t *latest_timestamp
+        , int tier
 )
 {
    BUFFER *wb = query_params->wb;
@ -250,7 +269,7 @@ int rrdset2anything_api_v1(
        dimensions ? buffer_tostring(dimensions) : NULL,
        query_params->context_param_list,
        group_options,
-        query_params->timeout);
+        query_params->timeout, tier);
    if(!r) {
        buffer_strcat(wb, "Cannot generate output with these parameters on this chart.");
        return HTTP_RESP_INTERNAL_SERVER_ERROR;
--- a/web/api/formatters/rrd2json.h
+++ b/web/api/formatters/rrd2json.h
@ -78,6 +78,7 @@ extern int rrdset2anything_api_v1(
        , long group_time
        , uint32_t options
        , time_t *latest_timestamp
+        , int tier
 );

 extern int rrdset2value_api_v1(
@ -99,6 +100,7 @@ extern int rrdset2value_api_v1(
        , int *value_is_null
        , uint8_t *anomaly_rate
        , int timeout
+        , int tier
 );

 extern void build_context_param_list(ONEWAYALLOC *owa, struct context_param **param_list, RRDSET *st);
--- a/web/api/formatters/value/value.c
+++ b/web/api/formatters/value/value.c
@ -3,11 +3,7 @@
 #include "value.h"


-inline NETDATA_DOUBLE
-rrdr2value(RRDR *r, long i, RRDR_OPTIONS options, int *all_values_are_null, uint8_t *anomaly_rate, RRDDIM *temp_rd) {
-    if (r->st_needs_lock)
-        rrdset_check_rdlock(r->st);
-
+inline NETDATA_DOUBLE rrdr2value(RRDR *r, long i, RRDR_OPTIONS options, int *all_values_are_null, uint8_t *anomaly_rate, RRDDIM *temp_rd) {
    long c;
    RRDDIM *d;

--- a/web/api/queries/average/average.c
+++ b/web/api/queries/average/average.c
@ -11,7 +11,7 @@ struct grouping_average {
 };

 void grouping_create_average(RRDR *r, const char *options __maybe_unused) {
-    r->internal.grouping_data = callocz(1, sizeof(struct grouping_average));
+    r->internal.grouping_data = onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_average));
 }

 // resets when switches dimensions
@ -23,7 +23,7 @@ void grouping_reset_average(RRDR *r) {
 }

 void grouping_free_average(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

--- a/web/api/queries/countif/countif.c
+++ b/web/api/queries/countif/countif.c
@ -37,7 +37,7 @@ static size_t countif_greaterequal(NETDATA_DOUBLE v, NETDATA_DOUBLE target) {
 }

 void grouping_create_countif(RRDR *r, const char *options __maybe_unused) {
-    struct grouping_countif *g = callocz(1, sizeof(struct grouping_countif));
+    struct grouping_countif *g = onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_countif));
    r->internal.grouping_data = g;

    if(options && *options) {
@ -106,7 +106,7 @@ void grouping_reset_countif(RRDR *r) {
 }

 void grouping_free_countif(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

--- a/web/api/queries/des/des.c
+++ b/web/api/queries/des/des.c
@ -37,16 +37,16 @@ static inline NETDATA_DOUBLE window(RRDR *r, struct grouping_des *g) {
    NETDATA_DOUBLE points;
    if(r->group == 1) {
        // provide a running DES
-        points = r->internal.points_wanted;
+        points = (NETDATA_DOUBLE)r->internal.points_wanted;
    }
    else {
        // provide a SES with flush points
-        points = r->group;
+        points = (NETDATA_DOUBLE)r->group;
    }

    // https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
    // A commonly used value for alpha is 2 / (N + 1)
-    return (points > max_window_size) ? max_window_size : points;
+    return (points > (NETDATA_DOUBLE)max_window_size) ? (NETDATA_DOUBLE)max_window_size : points;
 }

 static inline void set_alpha(RRDR *r, struct grouping_des *g) {
@ -70,7 +70,7 @@ static inline void set_beta(RRDR *r, struct grouping_des *g) {
 }

 void grouping_create_des(RRDR *r, const char *options __maybe_unused) {
-    struct grouping_des *g = (struct grouping_des *)mallocz(sizeof(struct grouping_des));
+    struct grouping_des *g = (struct grouping_des *)onewayalloc_mallocz(r->internal.owa, sizeof(struct grouping_des));
    set_alpha(r, g);
    set_beta(r, g);
    g->level = 0.0;
@ -92,7 +92,7 @@ void grouping_reset_des(RRDR *r) {
 }

 void grouping_free_des(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

--- a/web/api/queries/incremental_sum/incremental_sum.c
+++ b/web/api/queries/incremental_sum/incremental_sum.c
@ -12,7 +12,7 @@ struct grouping_incremental_sum {
 };

 void grouping_create_incremental_sum(RRDR *r, const char *options __maybe_unused) {
-    r->internal.grouping_data = callocz(1, sizeof(struct grouping_incremental_sum));
+    r->internal.grouping_data = onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_incremental_sum));
 }

 // resets when switches dimensions
@ -25,7 +25,7 @@ void grouping_reset_incremental_sum(RRDR *r) {
 }

 void grouping_free_incremental_sum(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

--- a/web/api/queries/max/max.c
+++ b/web/api/queries/max/max.c
@ -11,7 +11,7 @@ struct grouping_max {
 };

 void grouping_create_max(RRDR *r, const char *options __maybe_unused) {
-    r->internal.grouping_data = callocz(1, sizeof(struct grouping_max));
+    r->internal.grouping_data = onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_max));
 }

 // resets when switches dimensions
@ -23,7 +23,7 @@ void grouping_reset_max(RRDR *r) {
 }

 void grouping_free_max(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

--- a/web/api/queries/median/median.c
+++ b/web/api/queries/median/median.c
@ -10,14 +10,15 @@ struct grouping_median {
    size_t series_size;
    size_t next_pos;

-    NETDATA_DOUBLE series[];
+    NETDATA_DOUBLE *series;
 };

 void grouping_create_median(RRDR *r, const char *options __maybe_unused) {
    long entries = r->group;
    if(entries < 0) entries = 0;

-    struct grouping_median *g = (struct grouping_median *)callocz(1, sizeof(struct grouping_median) + entries * sizeof(NETDATA_DOUBLE));
+    struct grouping_median *g = (struct grouping_median *)onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_median));
+    g->series = onewayalloc_mallocz(r->internal.owa, entries * sizeof(NETDATA_DOUBLE));
    g->series_size = (size_t)entries;

    r->internal.grouping_data = g;
@ -31,7 +32,10 @@ void grouping_reset_median(RRDR *r) {
 }

 void grouping_free_median(RRDR *r) {
-    freez(r->internal.grouping_data);
+    struct grouping_median *g = (struct grouping_median *)r->internal.grouping_data;
+    if(g) onewayalloc_freez(r->internal.owa, g->series);
+
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

@ -39,7 +43,8 @@ void grouping_add_median(RRDR *r, NETDATA_DOUBLE value) {
    struct grouping_median *g = (struct grouping_median *)r->internal.grouping_data;

    if(unlikely(g->next_pos >= g->series_size)) {
-        error("INTERNAL ERROR: median buffer overflow on chart '%s' - next_pos = %zu, series_size = %zu, r->group = %ld.", r->st->name, g->next_pos, g->series_size, r->group);
+        g->series = onewayalloc_doublesize( r->internal.owa, g->series, g->series_size * sizeof(NETDATA_DOUBLE));
+        g->series_size *= 2;
    }
    else
        g->series[g->next_pos++] = (NETDATA_DOUBLE)value;
--- a/web/api/queries/min/min.c
+++ b/web/api/queries/min/min.c
@ -11,7 +11,7 @@ struct grouping_min {
 };

 void grouping_create_min(RRDR *r, const char *options __maybe_unused) {
-    r->internal.grouping_data = callocz(1, sizeof(struct grouping_min));
+    r->internal.grouping_data = onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_min));
 }

 // resets when switches dimensions
@ -23,7 +23,7 @@ void grouping_reset_min(RRDR *r) {
 }

 void grouping_free_min(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

--- a/web/api/queries/query.c
+++ b/web/api/queries/query.c
--- a/web/api/queries/query.h
+++ b/web/api/queries/query.h
@ -3,6 +3,10 @@
 #ifndef NETDATA_API_DATA_QUERY_H
 #define NETDATA_API_DATA_QUERY_H

+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef enum rrdr_grouping {
    RRDR_GROUPING_UNDEFINED = 0,
    RRDR_GROUPING_AVERAGE,
@ -23,4 +27,8 @@ extern void web_client_api_v1_init_grouping(void);
 extern RRDR_GROUPING web_client_api_request_v1_data_group(const char *name, RRDR_GROUPING def);
 extern const char *web_client_api_request_v1_data_group_to_string(RRDR_GROUPING group);

+#ifdef __cplusplus
+}
+#endif
+
 #endif //NETDATA_API_DATA_QUERY_H
--- a/web/api/queries/rrdr.c
+++ b/web/api/queries/rrdr.c
@ -58,35 +58,12 @@ static void rrdr_dump(RRDR *r)
 }
 */

-
-
-
-inline static void rrdr_lock_rrdset(RRDR *r) {
-    if(unlikely(!r)) {
-        error("NULL value given!");
-        return;
-    }
-
-    rrdset_rdlock(r->st);
-    r->has_st_lock = 1;
-}
-
-inline static void rrdr_unlock_rrdset(RRDR *r) {
-    if(unlikely(!r)) {
-        error("NULL value given!");
-        return;
-    }
-
-    if(likely(r->has_st_lock)) {
-        r->has_st_lock = 0;
-        rrdset_unlock(r->st);
-    }
-}
-
 inline void rrdr_free(ONEWAYALLOC *owa, RRDR *r) {
    if(unlikely(!r)) return;

-    rrdr_unlock_rrdset(r);
+    if(likely(r->st_locked_by_rrdr_create))
+        rrdset_unlock(r->st);
+
    onewayalloc_freez(owa, r->t);
    onewayalloc_freez(owa, r->v);
    onewayalloc_freez(owa, r->o);
@ -95,39 +72,51 @@ inline void rrdr_free(ONEWAYALLOC *owa, RRDR *r) {
    onewayalloc_freez(owa, r);
 }

-RRDR *rrdr_create(ONEWAYALLOC *owa, struct rrdset *st, long n, struct context_param *context_param_list)
-{
-    if (unlikely(!st)) {
-        error("NULL value given!");
-        return NULL;
-    }
-
+RRDR *rrdr_create_for_x_dimensions(ONEWAYALLOC *owa, int dimensions, long points) {
    RRDR *r = onewayalloc_callocz(owa, 1, sizeof(RRDR));
-    r->st = st;
+    r->internal.owa = owa;

+    r->d = dimensions;
+    r->n = points;
+
+    r->t = onewayalloc_callocz(owa, points, sizeof(time_t));
+    r->v = onewayalloc_mallocz(owa, points * dimensions * sizeof(NETDATA_DOUBLE));
+    r->o = onewayalloc_mallocz(owa, points * dimensions * sizeof(RRDR_VALUE_FLAGS));
+    r->ar = onewayalloc_mallocz(owa, points * dimensions * sizeof(uint8_t));
+    r->od = onewayalloc_mallocz(owa, dimensions * sizeof(RRDR_DIMENSION_FLAGS));
+
+    r->group = 1;
+    r->update_every = 1;
+
+    return r;
+}
+
+RRDR *rrdr_create(ONEWAYALLOC *owa, struct rrdset *st, long n, struct context_param *context_param_list) {
+    if (unlikely(!st)) return NULL;
+
+    bool st_locked_by_rrdr_create = false;
    if (!context_param_list || !(context_param_list->flags & CONTEXT_FLAGS_ARCHIVE)) {
-        rrdr_lock_rrdset(r);
-        r->st_needs_lock = 1;
+        rrdset_rdlock(st);
+        st_locked_by_rrdr_create = true;
    }

+    // count the number of dimensions
+    int dimensions = 0;
    RRDDIM *temp_rd =  context_param_list ? context_param_list->rd : NULL;
    RRDDIM *rd;
    if (temp_rd) {
        RRDDIM *t = temp_rd;
        while (t) {
-            r->d++;
+            dimensions++;
            t = t->next;
        }
    } else
-        rrddim_foreach_read(rd, st) r->d++;
+        rrddim_foreach_read(rd, st) dimensions++;

-    r->n = n;
-
-    r->t = onewayalloc_callocz(owa, (size_t)n, sizeof(time_t));
-    r->v = onewayalloc_mallocz(owa, n * r->d * sizeof(NETDATA_DOUBLE));
-    r->o = onewayalloc_mallocz(owa, n * r->d * sizeof(RRDR_VALUE_FLAGS));
-    r->ar = onewayalloc_mallocz(owa, n * r->d * sizeof(uint8_t));
-    r->od = onewayalloc_mallocz(owa, r->d * sizeof(RRDR_DIMENSION_FLAGS));
+    // create the rrdr
+    RRDR *r = rrdr_create_for_x_dimensions(owa, dimensions, n);
+    r->st = st;
+    r->st_locked_by_rrdr_create = st_locked_by_rrdr_create;

    // set the hidden flag on hidden dimensions
    int c;
@ -138,8 +127,5 @@ RRDR *rrdr_create(ONEWAYALLOC *owa, struct rrdset *st, long n, struct context_pa
            r->od[c] = RRDR_DIMENSION_DEFAULT;
    }

-    r->group = 1;
-    r->update_every = 1;
-
    return r;
 }
--- a/web/api/queries/rrdr.h
+++ b/web/api/queries/rrdr.h
@ -4,6 +4,18 @@
 #define NETDATA_QUERIES_RRDR_H

 #include "libnetdata/libnetdata.h"
+#include "web/api/queries/query.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum tier_query_fetch {
+    TIER_QUERY_FETCH_SUM,
+    TIER_QUERY_FETCH_MIN,
+    TIER_QUERY_FETCH_MAX,
+    TIER_QUERY_FETCH_AVERAGE
+} TIER_QUERY_FETCH;

 typedef enum rrdr_options {
    RRDR_OPTION_NONZERO        = 0x00000001, // don't output dimensions with just zero values
@ -24,12 +36,14 @@ typedef enum rrdr_options {
    RRDR_OPTION_MATCH_NAMES    = 0x00008000, // when filtering dimensions, match only names
    RRDR_OPTION_CUSTOM_VARS    = 0x00010000, // when wrapping response in a JSON, return custom variables in response
    RRDR_OPTION_NATURAL_POINTS = 0x00020000, // return the natural points of the database
-    RRDR_OPTION_ANOMALY_BIT    = 0x00040000, // Return the anomaly bit stored in each collected_number
-    RRDR_OPTION_RETURN_RAW     = 0x00080000, // Return raw data for aggregating across multiple nodes
-    RRDR_OPTION_RETURN_JWAR    = 0x00100000, // Return anomaly rates in jsonwrap
+    RRDR_OPTION_VIRTUAL_POINTS = 0x00040000, // return virtual points
+    RRDR_OPTION_ANOMALY_BIT    = 0x00080000, // Return the anomaly bit stored in each collected_number
+    RRDR_OPTION_RETURN_RAW     = 0x00100000, // Return raw data for aggregating across multiple nodes
+    RRDR_OPTION_RETURN_JWAR    = 0x00200000, // Return anomaly rates in jsonwrap
+    RRDR_OPTION_SELECTED_TIER  = 0x00400000, // Use the selected tier for the query

    // internal ones - not to be exposed to the API
-    RRDR_OPTION_INTERNAL_AR  = 0x10000000, // internal use only, to let the formatters we want to render the anomaly rate
+    RRDR_OPTION_INTERNAL_AR    = 0x10000000, // internal use only, to let the formatters we want to render the anomaly rate
 } RRDR_OPTIONS;

 typedef enum rrdr_value_flag {
@ -67,7 +81,7 @@ typedef struct rrdresult {
    RRDR_DIMENSION_FLAGS *od; // the options for the dimensions

    time_t *t;                // array of n timestamps
-    NETDATA_DOUBLE *v;     // array n x d values
+    NETDATA_DOUBLE *v;        // array n x d values
    RRDR_VALUE_FLAGS *o;      // array n x d options for each value returned
    uint8_t *ar;              // array n x d of anomaly rates (0 - 200)

@ -80,11 +94,13 @@ typedef struct rrdresult {
    time_t before;
    time_t after;

-    int has_st_lock;        // if st is read locked by us
-    uint8_t st_needs_lock;  // if ST should be locked
+    bool st_locked_by_rrdr_create;        // if st is read locked by us

    // internal rrd2rrdr() members below this point
    struct {
+        int query_tier;                         // the selected tier
+        RRDR_OPTIONS query_options;       // RRDR_OPTION_* (as run by the query)
+
        long points_wanted;
        long resampling_group;
        NETDATA_DOUBLE resampling_divisor;
@ -96,12 +112,15 @@ typedef struct rrdresult {
        NETDATA_DOUBLE (*grouping_flush)(struct rrdresult *r, RRDR_VALUE_FLAGS *rrdr_value_options_ptr);
        void *grouping_data;

+        TIER_QUERY_FETCH tier_query_fetch;
        #ifdef NETDATA_INTERNAL_CHECKS
        const char *log;
        #endif

        size_t db_points_read;
        size_t result_points_generated;
+        size_t tier_points_read[RRD_STORAGE_TIERS];
+        ONEWAYALLOC *owa;
    } internal;
 } RRDR;

@ -110,6 +129,7 @@ typedef struct rrdresult {
 #include "database/rrd.h"
 extern void rrdr_free(ONEWAYALLOC *owa, RRDR *r);
 extern RRDR *rrdr_create(ONEWAYALLOC *owa, struct rrdset *st, long n, struct context_param *context_param_list);
+extern RRDR *rrdr_create_for_x_dimensions(ONEWAYALLOC *owa, int dimensions, long points);

 #include "../web_api_v1.h"
 #include "web/api/queries/query.h"
@ -118,10 +138,12 @@ extern RRDR *rrd2rrdr(
    ONEWAYALLOC *owa,
    RRDSET *st, long points_wanted, long long after_wanted, long long before_wanted,
    RRDR_GROUPING group_method, long resampling_time_requested, RRDR_OPTIONS options, const char *dimensions,
-    struct context_param *context_param_list, const char *group_options, int timeout);
+    struct context_param *context_param_list, const char *group_options, int timeout, int tier);

-extern int rrdr_relative_window_to_absolute(long long *after, long long *before, int update_every, long points);
+extern int rrdr_relative_window_to_absolute(long long *after, long long *before);

-#include "query.h"
+#ifdef __cplusplus
+}
+#endif

 #endif //NETDATA_QUERIES_RRDR_H
--- a/web/api/queries/ses/ses.c
+++ b/web/api/queries/ses/ses.c
@ -31,14 +31,14 @@ static inline NETDATA_DOUBLE window(RRDR *r, struct grouping_ses *g) {
    NETDATA_DOUBLE points;
    if(r->group == 1) {
        // provide a running DES
-        points = r->internal.points_wanted;
+        points = (NETDATA_DOUBLE)r->internal.points_wanted;
    }
    else {
        // provide a SES with flush points
-        points = r->group;
+        points = (NETDATA_DOUBLE)r->group;
    }

-    return (points > max_window_size) ? max_window_size : points;
+    return (points > (NETDATA_DOUBLE)max_window_size) ? (NETDATA_DOUBLE)max_window_size : points;
 }

 static inline void set_alpha(RRDR *r, struct grouping_ses *g) {
@ -49,7 +49,7 @@ static inline void set_alpha(RRDR *r, struct grouping_ses *g) {
 }

 void grouping_create_ses(RRDR *r, const char *options __maybe_unused) {
-    struct grouping_ses *g = (struct grouping_ses *)callocz(1, sizeof(struct grouping_ses));
+    struct grouping_ses *g = (struct grouping_ses *)onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_ses));
    set_alpha(r, g);
    g->level = 0.0;
    r->internal.grouping_data = g;
@ -64,7 +64,7 @@ void grouping_reset_ses(RRDR *r) {
 }

 void grouping_free_ses(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

--- a/web/api/queries/stddev/stddev.c
+++ b/web/api/queries/stddev/stddev.c
@ -15,7 +15,7 @@ struct grouping_stddev {
 };

 void grouping_create_stddev(RRDR *r, const char *options __maybe_unused) {
-    r->internal.grouping_data = callocz(1, sizeof(struct grouping_stddev));
+    r->internal.grouping_data = onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_stddev));
 }

 // resets when switches dimensions
@ -26,7 +26,7 @@ void grouping_reset_stddev(RRDR *r) {
 }

 void grouping_free_stddev(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

@ -55,7 +55,7 @@ static inline NETDATA_DOUBLE mean(struct grouping_stddev *g) {
 }

 static inline NETDATA_DOUBLE variance(struct grouping_stddev *g) {
-    return ( (g->count > 1) ? g->m_newS/(g->count - 1) : 0.0 );
+    return ( (g->count > 1) ? g->m_newS/(NETDATA_DOUBLE)(g->count - 1) : 0.0 );
 }
 static inline NETDATA_DOUBLE stddev(struct grouping_stddev *g) {
    return sqrtndd(variance(g));
--- a/web/api/queries/sum/sum.c
+++ b/web/api/queries/sum/sum.c
@ -11,7 +11,7 @@ struct grouping_sum {
 };

 void grouping_create_sum(RRDR *r, const char *options __maybe_unused) {
-    r->internal.grouping_data = callocz(1, sizeof(struct grouping_sum));
+    r->internal.grouping_data = onewayalloc_callocz(r->internal.owa, 1, sizeof(struct grouping_sum));
 }

 // resets when switches dimensions
@ -23,7 +23,7 @@ void grouping_reset_sum(RRDR *r) {
 }

 void grouping_free_sum(RRDR *r) {
-    freez(r->internal.grouping_data);
+    onewayalloc_freez(r->internal.owa, r->internal.grouping_data);
    r->internal.grouping_data = NULL;
 }

--- a/web/api/web_api_v1.c
+++ b/web/api/web_api_v1.c
@ -36,9 +36,11 @@ static struct {
        , {"match-names"       , 0    , RRDR_OPTION_MATCH_NAMES}
        , {"showcustomvars"    , 0    , RRDR_OPTION_CUSTOM_VARS}
        , {"anomaly-bit"       , 0    , RRDR_OPTION_ANOMALY_BIT}
+        , {"selected-tier"     , 0    , RRDR_OPTION_SELECTED_TIER}
        , {"raw"               , 0    , RRDR_OPTION_RETURN_RAW}
        , {"jw-anomaly-rates"  , 0    , RRDR_OPTION_RETURN_JWAR}
        , {"natural-points"    , 0    , RRDR_OPTION_NATURAL_POINTS}
+        , {"virtual-points"    , 0    , RRDR_OPTION_VIRTUAL_POINTS}
        , {NULL                , 0    , 0}
 };

@ -436,7 +438,7 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c
    char *chart_label_key = NULL;
    char *chart_labels_filter = NULL;
    char *group_options = NULL;
-
+    int tier = 0;
    int group = RRDR_GROUPING_AVERAGE;
    int show_dimensions = 0;
    uint32_t format = DATASOURCE_JSON;
@ -520,6 +522,11 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c
        else if(!strcmp(name, "max_anomaly_rates")) {
            max_anomaly_rates_str = value;
        }
+        else if(!strcmp(name, "tier")) {
+            tier = str2i(value);
+            if(tier >= 0 && tier < storage_tiers)
+                options |= RRDR_OPTION_SELECTED_TIER;
+        }
    }

    // validate the google parameters given
@ -678,7 +685,7 @@ inline int web_client_api_request_v1_data(RRDHOST *host, struct web_client *w, c
        .wb = w->response.data};

    ret = rrdset2anything_api_v1(owa, st, &query_params, dimensions, format,
-            points, after, before, group, group_options, group_time, options, &last_timestamp_in_data);
+            points, after, before, group, group_options, group_time, options, &last_timestamp_in_data, tier);

    free_context_param_list(owa, &context_param_list);