0
0
Fork 0
mirror of https://github.com/netdata/netdata.git synced 2025-04-16 18:37:50 +00:00

Display uptime for processes ()

* Get process uptime

* Calculate target uptime

* Update charts

* Show collected data

* Fix chart names

* Update the documentation

* Fix a flag value

* Add an explanation note for the 'carried over uptime' chart

* Move the functions for getting uptime to libnetdata

* Rename the function for geting uptime

* Remove redundant code

* Fix starttime calculation

* More accurate definition for the carried over uptime

* fix group starttime calculation

* Fix typo
This commit is contained in:
Vladimir Kobal 2019-08-29 20:35:05 +03:00 committed by Paul Emm. Katsoulakis
parent 1907c1486f
commit c79112e853
6 changed files with 195 additions and 68 deletions
collectors
libnetdata/clocks
web/gui

View file

@ -47,6 +47,11 @@ Each of these sections provides the same number of charts:
- Threads Running
- Processes Running
- Pipes Open
- Carried Over Uptime (since the Netdata restart)
- Minimum Uptime
- Average Uptime
- Maximum Uptime
- Swap Memory
- Swap Memory Used
- Major Page Faults (i.e. swap activity)

View file

@ -260,6 +260,12 @@ struct target {
kernel_uint_t openeventpolls;
kernel_uint_t openother;
kernel_uint_t starttime;
kernel_uint_t collected_starttime;
kernel_uint_t uptime_min;
kernel_uint_t uptime_sum;
kernel_uint_t uptime_max;
unsigned int processes; // how many processes have been merged to this
int exposed; // if set, we have sent this to netdata
int hidden; // if set, we set the hidden flag on the dimension
@ -345,7 +351,7 @@ struct pid_stat {
// int64_t nice;
int32_t num_threads;
// int64_t itrealvalue;
// kernel_uint_t starttime;
kernel_uint_t collected_starttime;
// kernel_uint_t vsize;
// kernel_uint_t rss;
// kernel_uint_t rsslim;
@ -419,6 +425,8 @@ struct pid_stat {
usec_t io_collected_usec;
usec_t last_io_collected_usec;
kernel_uint_t uptime;
char *fds_dirname; // the full directory name in /proc/PID/fd
char *stat_filename;
@ -433,6 +441,8 @@ struct pid_stat {
size_t pagesize;
kernel_uint_t global_uptime;
// log each problem once per process
// log flood protection flags (log_thrown)
#define PID_LOG_IO 0x00000001
@ -1421,7 +1431,8 @@ static inline int read_proc_pid_stat(struct pid_stat *p, void *ptr) {
// p->nice = str2kernel_uint_t(procfile_lineword(ff, 0, 18));
p->num_threads = (int32_t)str2uint32_t(procfile_lineword(ff, 0, 19));
// p->itrealvalue = str2kernel_uint_t(procfile_lineword(ff, 0, 20));
// p->starttime = str2kernel_uint_t(procfile_lineword(ff, 0, 21));
p->collected_starttime = str2kernel_uint_t(procfile_lineword(ff, 0, 21)) / system_hz;
p->uptime = (global_uptime > p->collected_starttime)?(global_uptime - p->collected_starttime):0;
// p->vsize = str2kernel_uint_t(procfile_lineword(ff, 0, 22));
// p->rss = str2kernel_uint_t(procfile_lineword(ff, 0, 23));
// p->rsslim = str2kernel_uint_t(procfile_lineword(ff, 0, 24));
@ -1490,6 +1501,8 @@ cleanup:
return 0;
}
// ----------------------------------------------------------------------------
static inline int read_proc_pid_io(struct pid_stat *p, void *ptr) {
(void)ptr;
#ifdef __FreeBSD__
@ -2634,6 +2647,12 @@ static int collect_data_for_all_processes(void) {
collect_data_for_pid(pid, &procbase[i]);
}
#else
static char uptime_filename[FILENAME_MAX + 1] = "";
if(*uptime_filename == '\0')
snprintfz(uptime_filename, FILENAME_MAX, "%s/proc/uptime", netdata_configured_host_prefix);
global_uptime = (kernel_uint_t)(uptime_msec(uptime_filename) / MSEC_PER_SEC);
char dirname[FILENAME_MAX + 1];
snprintfz(dirname, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix);
@ -2879,6 +2898,11 @@ static size_t zero_all_targets(struct target *root) {
w->openother = 0;
}
w->collected_starttime = 0;
w->uptime_min = 0;
w->uptime_sum = 0;
w->uptime_max = 0;
if(unlikely(w->root_pid)) {
struct pid_on_target *pid_on_target_to_free, *pid_on_target = w->root_pid;
@ -3032,6 +3056,11 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p,
w->processes++;
w->num_threads += p->num_threads;
if(!w->collected_starttime || p->collected_starttime < w->collected_starttime) w->collected_starttime = p->collected_starttime;
if(!w->uptime_min || p->uptime < w->uptime_min) w->uptime_min = p->uptime;
w->uptime_sum += p->uptime;
if(!w->uptime_max || w->uptime_max < p->uptime) w->uptime_max = p->uptime;
if(unlikely(debug_enabled || w->debug_enabled)) {
debug_log_int("aggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt);
@ -3042,6 +3071,19 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p,
}
}
static inline void post_aggregate_targets(struct target *root) {
struct target *w;
for (w = root; w ; w = w->next) {
if(w->collected_starttime) {
if (!w->starttime || w->collected_starttime < w->starttime) {
w->starttime = w->collected_starttime;
}
} else {
w->starttime = 0;
}
}
}
static void calculate_netdata_statistics(void) {
apply_apps_groups_targets_inheritance();
@ -3102,6 +3144,10 @@ static void calculate_netdata_statistics(void) {
aggregate_pid_fds_on_targets(p);
}
post_aggregate_targets(apps_groups_root_target);
post_aggregate_targets(users_root_target);
post_aggregate_targets(groups_root_target);
cleanup_exited_pids();
}
@ -3457,6 +3503,36 @@ static void send_collected_data_to_netdata(struct target *root, const char *type
}
send_END();
#ifndef __FreeBSD__
send_BEGIN(type, "uptime", dt);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed && w->processes))
send_SET(w->name, (global_uptime > w->starttime)?(global_uptime - w->starttime):0);
}
send_END();
send_BEGIN(type, "uptime_min", dt);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed && w->processes))
send_SET(w->name, w->uptime_min);
}
send_END();
send_BEGIN(type, "uptime_avg", dt);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed && w->processes))
send_SET(w->name, w->processes?(w->uptime_sum / w->processes):0);
}
send_END();
send_BEGIN(type, "uptime_max", dt);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed && w->processes))
send_SET(w->name, w->uptime_max);
}
send_END();
#endif
send_BEGIN(type, "mem", dt);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed && w->processes))
@ -3615,6 +3691,32 @@ static void send_charts_updates_to_netdata(struct target *root, const char *type
fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
}
#ifndef __FreeBSD__
fprintf(stdout, "CHART %s.uptime '' '%s Carried Over Uptime' 'seconds' processes %s.uptime line 20008 %d\n", type, title, type, update_every);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed))
fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
}
fprintf(stdout, "CHART %s.uptime_min '' '%s Minimum Uptime' 'seconds' processes %s.uptime_min line 20009 %d\n", type, title, type, update_every);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed))
fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
}
fprintf(stdout, "CHART %s.uptime_avg '' '%s Average Uptime' 'seconds' processes %s.uptime_avg line 20010 %d\n", type, title, type, update_every);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed))
fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
}
fprintf(stdout, "CHART %s.uptime_max '' '%s Maximum Uptime' 'seconds' processes %s.uptime_max line 20011 %d\n", type, title, type, update_every);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed))
fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
}
#endif
fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (%d%% = %d core%s)' 'percentage' cpu %s.cpu_user stacked 20020 %d\n", type, title, (processors * 100), processors, (processors>1)?"s":"", type, update_every);
for (w = root; w ; w = w->next) {
if(unlikely(w->exposed))

View file

@ -2,76 +2,17 @@
#include "plugin_proc.h"
static inline collected_number uptime_from_boottime(void) {
#ifdef CLOCK_BOOTTIME_IS_AVAILABLE
return now_boottime_usec() / 1000;
#else
error("uptime cannot be read from CLOCK_BOOTTIME on this system.");
return 0;
#endif
}
static procfile *read_proc_uptime_ff = NULL;
static inline collected_number read_proc_uptime(void) {
if(unlikely(!read_proc_uptime_ff)) {
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/uptime");
read_proc_uptime_ff = procfile_open(config_get("plugin:proc:/proc/uptime", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT);
if(unlikely(!read_proc_uptime_ff)) return 0;
}
read_proc_uptime_ff = procfile_readall(read_proc_uptime_ff);
if(unlikely(!read_proc_uptime_ff)) return 0;
if(unlikely(procfile_lines(read_proc_uptime_ff) < 1)) {
error("/proc/uptime has no lines.");
return 0;
}
if(unlikely(procfile_linewords(read_proc_uptime_ff, 0) < 1)) {
error("/proc/uptime has less than 1 word in it.");
return 0;
}
return (collected_number)(strtold(procfile_lineword(read_proc_uptime_ff, 0, 0), NULL) * 1000.0);
}
int do_proc_uptime(int update_every, usec_t dt) {
(void)dt;
static int use_boottime = -1;
static char *uptime_filename = NULL;
if(!uptime_filename) {
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/uptime");
if(unlikely(use_boottime == -1)) {
collected_number uptime_boottime = uptime_from_boottime();
collected_number uptime_proc = read_proc_uptime();
long long delta = (long long)uptime_boottime - (long long)uptime_proc;
if(delta < 0) delta = -delta;
if(delta <= 1000 && uptime_boottime != 0) {
procfile_close(read_proc_uptime_ff);
info("Using now_boottime_usec() for uptime (dt is %lld ms)", delta);
use_boottime = 1;
}
else if(uptime_proc != 0) {
info("Using /proc/uptime for uptime (dt is %lld ms)", delta);
use_boottime = 0;
}
else {
error("Cannot find any way to read uptime on this system.");
return 1;
}
uptime_filename = config_get("plugin:proc:/proc/uptime", "filename to monitor", filename);
}
collected_number uptime;
if(use_boottime)
uptime = uptime_from_boottime();
else
uptime = read_proc_uptime();
// --------------------------------------------------------------------
static RRDSET *st = NULL;
static RRDDIM *rd = NULL;
@ -97,7 +38,7 @@ int do_proc_uptime(int update_every, usec_t dt) {
else
rrdset_next(st);
rrddim_set_by_pointer(st, rd, uptime);
rrddim_set_by_pointer(st, rd, uptime_msec(uptime_filename));
rrdset_done(st);

View file

@ -210,3 +210,68 @@ int sleep_usec(usec_t usec) {
return ret;
#endif
}
static inline collected_number uptime_from_boottime(void) {
#ifdef CLOCK_BOOTTIME_IS_AVAILABLE
return now_boottime_usec() / 1000;
#else
error("uptime cannot be read from CLOCK_BOOTTIME on this system.");
return 0;
#endif
}
static procfile *read_proc_uptime_ff = NULL;
static inline collected_number read_proc_uptime(char *filename) {
if(unlikely(!read_proc_uptime_ff)) {
read_proc_uptime_ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
if(unlikely(!read_proc_uptime_ff)) return 0;
}
read_proc_uptime_ff = procfile_readall(read_proc_uptime_ff);
if(unlikely(!read_proc_uptime_ff)) return 0;
if(unlikely(procfile_lines(read_proc_uptime_ff) < 1)) {
error("/proc/uptime has no lines.");
return 0;
}
if(unlikely(procfile_linewords(read_proc_uptime_ff, 0) < 1)) {
error("/proc/uptime has less than 1 word in it.");
return 0;
}
return (collected_number)(strtold(procfile_lineword(read_proc_uptime_ff, 0, 0), NULL) * 1000.0);
}
inline collected_number uptime_msec(char *filename){
static int use_boottime = -1;
if(unlikely(use_boottime == -1)) {
collected_number uptime_boottime = uptime_from_boottime();
collected_number uptime_proc = read_proc_uptime(filename);
long long delta = (long long)uptime_boottime - (long long)uptime_proc;
if(delta < 0) delta = -delta;
if(delta <= 1000 && uptime_boottime != 0) {
procfile_close(read_proc_uptime_ff);
info("Using now_boottime_usec() for uptime (dt is %lld ms)", delta);
use_boottime = 1;
}
else if(uptime_proc != 0) {
info("Using /proc/uptime for uptime (dt is %lld ms)", delta);
use_boottime = 0;
}
else {
error("Cannot find any way to read uptime on this system.");
return 1;
}
}
collected_number uptime;
if(use_boottime)
uptime = uptime_from_boottime();
else
uptime = read_proc_uptime(filename);
return uptime;
}

View file

@ -136,4 +136,6 @@ extern int sleep_usec(usec_t usec);
*/
void test_clock_boottime(void);
extern collected_number uptime_msec(char *filename);
#endif /* NETDATA_CLOCKS_H */

View file

@ -985,6 +985,10 @@ netdataDashboard.context = {
height: 2.0
},
'apps.uptime': {
info: 'Carried over process group uptime since the Netdata restart. The period of time within which at least one process in the group was running.'
},
// ------------------------------------------------------------------------
// USERS
@ -1008,6 +1012,10 @@ netdataDashboard.context = {
height: 2.0
},
'users.uptime': {
info: 'Carried over process group uptime since the Netdata restart. The period of time within which at least one process in the group was running.'
},
// ------------------------------------------------------------------------
// GROUPS
@ -1020,7 +1028,7 @@ netdataDashboard.context = {
},
'groups.vmem': {
info: 'Virtual memory allocated per user group. Please check <a href="https://github.com/netdata/netdata/tree/master/daemon#virtual-memory" target="_blank">this article</a> for more information.'
info: 'Virtual memory allocated per user group since the Netdata restart. Please check <a href="https://github.com/netdata/netdata/tree/master/daemon#virtual-memory" target="_blank">this article</a> for more information.'
},
'groups.preads': {
@ -1031,6 +1039,10 @@ netdataDashboard.context = {
height: 2.0
},
'groups.uptime': {
info: 'Carried over process group uptime. The period of time within which at least one process in the group was running.'
},
// ------------------------------------------------------------------------
// NETWORK QoS