0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-09 15:47:53 +00:00
netdata_netdata/src/libnetdata/clocks/clocks.c
vkalintiris e99da8b64b
Run the agent as a Windows service. ()
* Run the agent as a Windows service.

This commit contains the boilerplate code for running the agent as a
Windows service.

We start the agent's main as a separate thread, although this is not
strictly required based on my experiments. We need similar logic for
calling netdata's exit function when someone wants to stop the agent.

However, at this point we need to resolve the issue of gaps when
running the agent as a service. It seems that sleeping for one second
with `sleep(1)`, actually sleeps for 2 to 4 seconds on my setup.

Once we resolve this, the work that remains concerns packaging: ie.
installing the binaries at the proper places so that the relevant
DLLs are found.

To test this PR you need to:
  - Build the agent: ./packaging/utils/compile-on-windows.sh
  - Install the files: `ninja -C build/ install`
  - Copy the main binary: `cp ./build/netdata /usr/bin/`
  - (Only once) Create the netdata service: `sc.exe config Netdata binPath="C:\msys64\usr\bin\netdata"`
  - Start the service: `sc.exe start Netdata`

A couple notes:
  - The health and the spawn client have been disabled for the time
    being. They will be re-enabled once we finish the agent-as-service
    issue and the packaging.
  - Last time I checked, the agent crashes after a while when using
    dbengine. In order to have something that works correctly, you
    should specify memory-mode ram in your netdata.conf.

* Add windows version for sleep_usec_with_now

* Split install prefix from runtime prefix

These paths are always the same for non-Windows
systems. On Windows, RFS is the top-level
installation path.

With the current setup, Netdata will be installed
at C:\msys64\opt\netdata at packaging time. However,
the layout of the application means that when the
agent starts, it'll look as if everything was installed
at /.

* Do not use mold linker on Windows.

* Use modern UI for installer.

* Make the service delayed-auto

* Use mutexes instead of spinlocks.

* Update service handling logic.

* Add proper ifdefs for spinlock implementation.

* Initialize analytics spinlock

* Add a macro to build the agent as regular cli tool.

* Add makensis dependency

* Let installer know it's installing Netdata.

* Disable pluginsd on Windows

When pluginsd is enabled, the agent freezes approximately
20% of the time during startup.

* Add service description.

* Return pthread_join result

* Print tag when we fail to join a thread.

* Do not use mutexes instead of spinlocks.

* Assorted changes to service/main code.

* Rework service functions.

With the current implementation we are not getting any
MUTEX_LOCK errors and thread joining succeeds.

The only case where joining fails is the parallel initialization
of dbengine threads, which we can easily avoid by serializing
the initialization step.

* Rework main functions

This will allow someone to run the agent either as a service
or as a command-line tool.

* Change runtime prefix only when building for packaging.

* Install binaries and dlls.

* Make netdata claiming through UI work correctly.

* Fix netdata path
2024-07-02 12:19:20 +03:00

508 lines
17 KiB
C

// SPDX-License-Identifier: GPL-3.0-or-later
#include "../libnetdata.h"
// defaults are for compatibility
// call clocks_init() once, to optimize these default settings
static clockid_t clock_boottime_to_use = CLOCK_MONOTONIC;
static clockid_t clock_monotonic_to_use = CLOCK_MONOTONIC;
// the default clock resolution is 1ms
#define DEFAULT_CLOCK_RESOLUTION_UT ((usec_t)0 * USEC_PER_SEC + (usec_t)1 * USEC_PER_MS)
// the max clock resolution is 10ms
#define MAX_CLOCK_RESOLUTION_UT ((usec_t)0 * USEC_PER_SEC + (usec_t)10 * USEC_PER_MS)
usec_t clock_monotonic_resolution = DEFAULT_CLOCK_RESOLUTION_UT;
usec_t clock_realtime_resolution = DEFAULT_CLOCK_RESOLUTION_UT;
#ifndef HAVE_CLOCK_GETTIME
inline int clock_gettime(clockid_t clk_id __maybe_unused, struct timespec *ts) {
struct timeval tv;
if(unlikely(gettimeofday(&tv, NULL) == -1)) {
netdata_log_error("gettimeofday() failed.");
return -1;
}
ts->tv_sec = tv.tv_sec;
ts->tv_nsec = (long)((tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC);
return 0;
}
#endif
// Similar to CLOCK_MONOTONIC, but provides access to a raw hardware-based time that is not subject to NTP adjustments
// or the incremental adjustments performed by adjtime(3). This clock does not count time that the system is suspended
static void test_clock_monotonic_raw(void) {
#ifdef CLOCK_MONOTONIC_RAW
struct timespec ts;
if(clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == -1 && errno == EINVAL)
clock_monotonic_to_use = CLOCK_MONOTONIC;
else
clock_monotonic_to_use = CLOCK_MONOTONIC_RAW;
#else
clock_monotonic_to_use = CLOCK_MONOTONIC;
#endif
}
// When running a binary with CLOCK_BOOTTIME defined on a system with a linux kernel older than Linux 2.6.39 the
// clock_gettime(2) system call fails with EINVAL. In that case it must fall-back to CLOCK_MONOTONIC.
static void test_clock_boottime(void) {
struct timespec ts;
if(clock_gettime(CLOCK_BOOTTIME, &ts) == -1 && errno == EINVAL)
clock_boottime_to_use = clock_monotonic_to_use;
else
clock_boottime_to_use = CLOCK_BOOTTIME;
}
static usec_t get_clock_resolution(clockid_t clock) {
struct timespec ts = { 0 };
if(clock_getres(clock, &ts) == 0) {
usec_t ret = (usec_t)ts.tv_sec * USEC_PER_SEC + (usec_t)ts.tv_nsec / NSEC_PER_USEC;
if(!ret && ts.tv_nsec > 0 && ts.tv_nsec < (long int)NSEC_PER_USEC)
return (usec_t)1;
else if(ret > MAX_CLOCK_RESOLUTION_UT) {
nd_log(NDLS_DAEMON, NDLP_ERR, "clock_getres(%d) returned %"PRIu64" usec is out of range, using defaults for clock resolution.", (int)clock, ret);
return DEFAULT_CLOCK_RESOLUTION_UT;
}
return ret;
}
else {
nd_log(NDLS_DAEMON, NDLP_ERR, "clock_getres(%d) failed, using defaults for clock resolution.", (int)clock);
return DEFAULT_CLOCK_RESOLUTION_UT;
}
}
// perform any initializations required for clocks
void clocks_init(void) {
// monotonic raw has to be tested before boottime
test_clock_monotonic_raw();
// boottime has to be tested after monotonic coarse
test_clock_boottime();
clock_monotonic_resolution = get_clock_resolution(clock_monotonic_to_use);
clock_realtime_resolution = get_clock_resolution(CLOCK_REALTIME);
}
inline time_t now_sec(clockid_t clk_id) {
struct timespec ts;
if(unlikely(clock_gettime(clk_id, &ts) == -1)) {
netdata_log_error("clock_gettime(%ld, &timespec) failed.", (long int)clk_id);
return 0;
}
return ts.tv_sec;
}
inline usec_t now_usec(clockid_t clk_id) {
struct timespec ts;
if(unlikely(clock_gettime(clk_id, &ts) == -1)) {
netdata_log_error("clock_gettime(%ld, &timespec) failed.", (long int)clk_id);
return 0;
}
return (usec_t)ts.tv_sec * USEC_PER_SEC + (usec_t)(ts.tv_nsec % NSEC_PER_SEC) / NSEC_PER_USEC;
}
inline int now_timeval(clockid_t clk_id, struct timeval *tv) {
struct timespec ts;
if(unlikely(clock_gettime(clk_id, &ts) == -1)) {
netdata_log_error("clock_gettime(%ld, &timespec) failed.", (long int)clk_id);
tv->tv_sec = 0;
tv->tv_usec = 0;
return -1;
}
tv->tv_sec = ts.tv_sec;
tv->tv_usec = (suseconds_t)((ts.tv_nsec % NSEC_PER_SEC) / NSEC_PER_USEC);
return 0;
}
inline time_t now_realtime_sec(void) {
return now_sec(CLOCK_REALTIME);
}
inline msec_t now_realtime_msec(void) {
return now_usec(CLOCK_REALTIME) / USEC_PER_MS;
}
inline usec_t now_realtime_usec(void) {
return now_usec(CLOCK_REALTIME);
}
inline int now_realtime_timeval(struct timeval *tv) {
return now_timeval(CLOCK_REALTIME, tv);
}
inline time_t now_monotonic_sec(void) {
return now_sec(clock_monotonic_to_use);
}
inline usec_t now_monotonic_usec(void) {
return now_usec(clock_monotonic_to_use);
}
inline int now_monotonic_timeval(struct timeval *tv) {
return now_timeval(clock_monotonic_to_use, tv);
}
inline time_t now_monotonic_high_precision_sec(void) {
return now_sec(CLOCK_MONOTONIC);
}
inline usec_t now_monotonic_high_precision_usec(void) {
return now_usec(CLOCK_MONOTONIC);
}
inline int now_monotonic_high_precision_timeval(struct timeval *tv) {
return now_timeval(CLOCK_MONOTONIC, tv);
}
inline time_t now_boottime_sec(void) {
return now_sec(clock_boottime_to_use);
}
inline usec_t now_boottime_usec(void) {
return now_usec(clock_boottime_to_use);
}
inline int now_boottime_timeval(struct timeval *tv) {
return now_timeval(clock_boottime_to_use, tv);
}
inline usec_t timeval_usec(struct timeval *tv) {
return (usec_t)tv->tv_sec * USEC_PER_SEC + (tv->tv_usec % USEC_PER_SEC);
}
inline msec_t timeval_msec(struct timeval *tv) {
return (msec_t)tv->tv_sec * MSEC_PER_SEC + ((tv->tv_usec % USEC_PER_SEC) / MSEC_PER_SEC);
}
inline susec_t dt_usec_signed(struct timeval *now, struct timeval *old) {
usec_t ts1 = timeval_usec(now);
usec_t ts2 = timeval_usec(old);
if(likely(ts1 >= ts2)) return (susec_t)(ts1 - ts2);
return -((susec_t)(ts2 - ts1));
}
inline usec_t dt_usec(struct timeval *now, struct timeval *old) {
usec_t ts1 = timeval_usec(now);
usec_t ts2 = timeval_usec(old);
return (ts1 > ts2) ? (ts1 - ts2) : (ts2 - ts1);
}
#ifdef __linux__
void sleep_to_absolute_time(usec_t usec) {
static int einval_printed = 0, enotsup_printed = 0, eunknown_printed = 0;
clockid_t clock = CLOCK_REALTIME;
struct timespec req = {
.tv_sec = (time_t)(usec / USEC_PER_SEC),
.tv_nsec = (suseconds_t)((usec % USEC_PER_SEC) * NSEC_PER_USEC)
};
errno = 0;
int ret = 0;
while( (ret = clock_nanosleep(clock, TIMER_ABSTIME, &req, NULL)) != 0 ) {
if(ret == EINTR) {
errno = 0;
continue;
}
else {
if (ret == EINVAL) {
if (!einval_printed) {
einval_printed++;
netdata_log_error("Invalid time given to clock_nanosleep(): clockid = %d, tv_sec = %lld, tv_nsec = %ld",
clock,
(long long)req.tv_sec,
req.tv_nsec);
}
} else if (ret == ENOTSUP) {
if (!enotsup_printed) {
enotsup_printed++;
netdata_log_error("Invalid clock id given to clock_nanosleep(): clockid = %d, tv_sec = %lld, tv_nsec = %ld",
clock,
(long long)req.tv_sec,
req.tv_nsec);
}
} else {
if (!eunknown_printed) {
eunknown_printed++;
netdata_log_error("Unknown return value %d from clock_nanosleep(): clockid = %d, tv_sec = %lld, tv_nsec = %ld",
ret,
clock,
(long long)req.tv_sec,
req.tv_nsec);
}
}
sleep_usec(usec);
}
}
}
#endif
#define HEARTBEAT_ALIGNMENT_STATISTICS_SIZE 10
netdata_mutex_t heartbeat_alignment_mutex = NETDATA_MUTEX_INITIALIZER;
static size_t heartbeat_alignment_id = 0;
struct heartbeat_thread_statistics {
size_t sequence;
usec_t dt;
};
static struct heartbeat_thread_statistics heartbeat_alignment_values[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE] = { 0 };
void heartbeat_statistics(usec_t *min_ptr, usec_t *max_ptr, usec_t *average_ptr, size_t *count_ptr) {
struct heartbeat_thread_statistics current[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE];
static struct heartbeat_thread_statistics old[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE] = { 0 };
memcpy(current, heartbeat_alignment_values, sizeof(struct heartbeat_thread_statistics) * HEARTBEAT_ALIGNMENT_STATISTICS_SIZE);
usec_t min = 0, max = 0, total = 0, average = 0;
size_t i, count = 0;
for(i = 0; i < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE ;i++) {
if(current[i].sequence == old[i].sequence) continue;
usec_t value = current[i].dt - old[i].dt;
if(!count) {
min = max = total = value;
count = 1;
}
else {
total += value;
if(value < min) min = value;
if(value > max) max = value;
count++;
}
}
if(count)
average = total / count;
if(min_ptr) *min_ptr = min;
if(max_ptr) *max_ptr = max;
if(average_ptr) *average_ptr = average;
if(count_ptr) *count_ptr = count;
memcpy(old, current, sizeof(struct heartbeat_thread_statistics) * HEARTBEAT_ALIGNMENT_STATISTICS_SIZE);
}
inline void heartbeat_init(heartbeat_t *hb) {
hb->realtime = 0ULL;
hb->randomness = (usec_t)250 * USEC_PER_MS + ((usec_t)(now_realtime_usec() * clock_realtime_resolution) % (250 * USEC_PER_MS));
hb->randomness -= (hb->randomness % clock_realtime_resolution);
netdata_mutex_lock(&heartbeat_alignment_mutex);
hb->statistics_id = heartbeat_alignment_id;
heartbeat_alignment_id++;
netdata_mutex_unlock(&heartbeat_alignment_mutex);
if(hb->statistics_id < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE) {
heartbeat_alignment_values[hb->statistics_id].dt = 0;
heartbeat_alignment_values[hb->statistics_id].sequence = 0;
}
}
// waits for the next heartbeat
// it waits using the monotonic clock
// it returns the dt using the realtime clock
usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) {
if(unlikely(hb->randomness > tick / 2)) {
// TODO: The heartbeat tick should be specified at the heartbeat_init() function
usec_t tmp = (now_realtime_usec() * clock_realtime_resolution) % (tick / 2);
nd_log_limit_static_global_var(erl, 10, 0);
nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
"heartbeat randomness of %"PRIu64" is too big for a tick of %"PRIu64" - setting it to %"PRIu64"",
hb->randomness, tick, tmp);
hb->randomness = tmp;
}
usec_t dt;
usec_t now = now_realtime_usec();
usec_t next = now - (now % tick) + tick + hb->randomness;
// align the next time we want to the clock resolution
if(next % clock_realtime_resolution)
next = next - (next % clock_realtime_resolution) + clock_realtime_resolution;
// sleep_usec() has a loop to guarantee we will sleep for at least the requested time.
// According the specs, when we sleep for a relative time, clock adjustments should not affect the duration
// we sleep.
sleep_usec_with_now(next - now, now);
now = now_realtime_usec();
dt = now - hb->realtime;
if(hb->statistics_id < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE) {
heartbeat_alignment_values[hb->statistics_id].dt += now - next;
heartbeat_alignment_values[hb->statistics_id].sequence++;
}
if(unlikely(now < next)) {
errno = 0;
nd_log_limit_static_global_var(erl, 10, 0);
nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
"heartbeat clock: woke up %"PRIu64" microseconds earlier than expected "
"(can be due to the CLOCK_REALTIME set to the past).",
next - now);
}
else if(unlikely(now - next > tick / 2)) {
errno = 0;
nd_log_limit_static_global_var(erl, 10, 0);
nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
"heartbeat clock: woke up %"PRIu64" microseconds later than expected "
"(can be due to system load or the CLOCK_REALTIME set to the future).",
now - next);
}
if(unlikely(!hb->realtime)) {
// the first time return zero
dt = 0;
}
hb->realtime = now;
return dt;
}
#ifdef OS_WINDOWS
#include "windows.h"
void sleep_usec_with_now(usec_t usec, usec_t started_ut)
{
if (!started_ut)
started_ut = now_realtime_usec();
usec_t end_ut = started_ut + usec;
usec_t remaining_ut = usec;
timeBeginPeriod(1);
while (remaining_ut >= 1000)
{
DWORD sleep_ms = (DWORD) (remaining_ut / USEC_PER_MS);
Sleep(sleep_ms);
usec_t now_ut = now_realtime_usec();
if (now_ut >= end_ut)
break;
remaining_ut = end_ut - now_ut;
}
timeEndPeriod(1);
}
#else
void sleep_usec_with_now(usec_t usec, usec_t started_ut) {
// we expect microseconds (1.000.000 per second)
// but timespec is nanoseconds (1.000.000.000 per second)
struct timespec rem = { 0, 0 }, req = {
.tv_sec = (time_t) (usec / USEC_PER_SEC),
.tv_nsec = (suseconds_t) ((usec % USEC_PER_SEC) * NSEC_PER_USEC)
};
// make sure errno is not EINTR
errno = 0;
if(!started_ut)
started_ut = now_realtime_usec();
usec_t end_ut = started_ut + usec;
while (nanosleep(&req, &rem) != 0) {
if (likely(errno == EINTR && (rem.tv_sec || rem.tv_nsec))) {
req = rem;
rem = (struct timespec){ 0, 0 };
// break an infinite loop
errno = 0;
usec_t now_ut = now_realtime_usec();
if(now_ut >= end_ut)
break;
usec_t remaining_ut = (usec_t)req.tv_sec * USEC_PER_SEC + (usec_t)req.tv_nsec * NSEC_PER_USEC > usec;
usec_t check_ut = now_ut - started_ut;
if(remaining_ut > check_ut) {
req = (struct timespec){
.tv_sec = (time_t) ( check_ut / USEC_PER_SEC),
.tv_nsec = (suseconds_t) ((check_ut % USEC_PER_SEC) * NSEC_PER_USEC)
};
}
}
else {
netdata_log_error("Cannot nanosleep() for %"PRIu64" microseconds.", usec);
break;
}
}
}
#endif
static inline collected_number uptime_from_boottime(void) {
#ifdef CLOCK_BOOTTIME_IS_AVAILABLE
return (collected_number)(now_boottime_usec() / USEC_PER_MS);
#else
netdata_log_error("uptime cannot be read from CLOCK_BOOTTIME on this system.");
return 0;
#endif
}
static procfile *read_proc_uptime_ff = NULL;
static inline collected_number read_proc_uptime(char *filename) {
if(unlikely(!read_proc_uptime_ff)) {
read_proc_uptime_ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
if(unlikely(!read_proc_uptime_ff)) return 0;
}
read_proc_uptime_ff = procfile_readall(read_proc_uptime_ff);
if(unlikely(!read_proc_uptime_ff)) return 0;
if(unlikely(procfile_lines(read_proc_uptime_ff) < 1)) {
netdata_log_error("/proc/uptime has no lines.");
return 0;
}
if(unlikely(procfile_linewords(read_proc_uptime_ff, 0) < 1)) {
netdata_log_error("/proc/uptime has less than 1 word in it.");
return 0;
}
return (collected_number)(strtondd(procfile_lineword(read_proc_uptime_ff, 0, 0), NULL) * 1000.0);
}
inline collected_number uptime_msec(char *filename){
static int use_boottime = -1;
if(unlikely(use_boottime == -1)) {
collected_number uptime_boottime = uptime_from_boottime();
collected_number uptime_proc = read_proc_uptime(filename);
long long delta = (long long)uptime_boottime - (long long)uptime_proc;
if(delta < 0) delta = -delta;
if(delta <= 1000 && uptime_boottime != 0) {
procfile_close(read_proc_uptime_ff);
netdata_log_info("Using now_boottime_usec() for uptime (dt is %lld ms)", delta);
use_boottime = 1;
}
else if(uptime_proc != 0) {
netdata_log_info("Using /proc/uptime for uptime (dt is %lld ms)", delta);
use_boottime = 0;
}
else {
netdata_log_error("Cannot find any way to read uptime on this system.");
return 1;
}
}
collected_number uptime;
if(use_boottime)
uptime = uptime_from_boottime();
else
uptime = read_proc_uptime(filename);
return uptime;
}