diff options
Diffstat (limited to 'tools/tracing')
44 files changed, 12460 insertions, 0 deletions
diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile new file mode 100644 index 000000000000..95e485f12d97 --- /dev/null +++ b/tools/tracing/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../scripts/Makefile.include + +all: latency rtla + +clean: latency_clean rtla_clean + +install: latency_install rtla_install + +latency: + $(call descend,latency) + +latency_install: + $(call descend,latency,install) + +latency_clean: + $(call descend,latency,clean) + +rtla: + $(call descend,rtla) + +rtla_install: + $(call descend,rtla,install) + +rtla_clean: + $(call descend,rtla,clean) + +.PHONY: all install clean latency latency_install latency_clean \ + rtla rtla_install rtla_clean diff --git a/tools/tracing/latency/.gitignore b/tools/tracing/latency/.gitignore new file mode 100644 index 000000000000..2bb8e60f7fdd --- /dev/null +++ b/tools/tracing/latency/.gitignore @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +latency-collector +fixdep +feature +FEATURE-DUMP diff --git a/tools/tracing/latency/Build b/tools/tracing/latency/Build new file mode 100644 index 000000000000..0ce65ea72bf9 --- /dev/null +++ b/tools/tracing/latency/Build @@ -0,0 +1 @@ +latency-collector-y += latency-collector.o diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile new file mode 100644 index 000000000000..257a56b1899f --- /dev/null +++ b/tools/tracing/latency/Makefile @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: GPL-2.0-only + +ifeq ($(srctree),) + srctree := $(patsubst %/,%,$(dir $(CURDIR))) + srctree := $(patsubst %/,%,$(dir $(srctree))) + srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +include $(srctree)/tools/scripts/Makefile.include + +# O is an alias for OUTPUT +OUTPUT := $(O) + +ifeq ($(OUTPUT),) + OUTPUT := $(CURDIR) +else + # subdir is used by the ../Makefile in $(call descend,) + ifneq ($(subdir),) + OUTPUT := $(OUTPUT)/$(subdir) + endif +endif + +ifneq ($(patsubst %/,,$(lastword $(OUTPUT))),) + OUTPUT := $(OUTPUT)/ +endif + +LATENCY-COLLECTOR := $(OUTPUT)latency-collector +LATENCY-COLLECTOR_IN := $(LATENCY-COLLECTOR)-in.o + +export CC := gcc +export LD := ld +export AR := ar +export PKG_CONFIG := pkg-config + +FEATURE_TESTS := libtraceevent +FEATURE_TESTS += libtracefs +FEATURE_DISPLAY := libtraceevent +FEATURE_DISPLAY += libtracefs + +all: $(LATENCY-COLLECTOR) + +include $(srctree)/tools/build/Makefile.include + +# check for dependencies only on required targets +NON_CONFIG_TARGETS := clean install + +config := 1 +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) + config := 0 +endif +endif + +ifeq ($(config),1) + include $(srctree)/tools/build/Makefile.feature + include Makefile.config +endif + +CFLAGS += $(INCLUDES) $(LIB_INCLUDES) + +export CFLAGS OUTPUT srctree + +$(LATENCY-COLLECTOR): $(LATENCY-COLLECTOR_IN) + $(QUIET_LINK)$(CC) $(LDFLAGS) -o $(LATENCY-COLLECTOR) $(LATENCY-COLLECTOR_IN) $(EXTLIBS) + +latency-collector.%: fixdep FORCE + make -f $(srctree)/tools/build/Makefile.build dir=. $@ + +$(LATENCY-COLLECTOR_IN): fixdep FORCE + make $(build)=latency-collector + +INSTALL := install +MKDIR := mkdir +STRIP := strip +BINDIR := /usr/bin + +install: + @$(MKDIR) -p $(DESTDIR)$(BINDIR) + $(call QUIET_INSTALL,latency-collector)$(INSTALL) $(LATENCY-COLLECTOR) -m 755 $(DESTDIR)$(BINDIR) + @$(STRIP) $(DESTDIR)$(BINDIR)/latency-collector + +clean: + $(call QUIET_CLEAN, latency-collector) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)@rm -f latency-collector fixdep FEATURE-DUMP + $(Q)rm -rf feature +.PHONY: FORCE clean install diff --git a/tools/tracing/latency/Makefile.config b/tools/tracing/latency/Makefile.config new file mode 100644 index 000000000000..6efa13e3ca93 --- /dev/null +++ b/tools/tracing/latency/Makefile.config @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0-only + +include $(srctree)/tools/scripts/utilities.mak + +STOP_ERROR := + +ifndef ($(NO_LIBTRACEEVENT),1) + ifeq ($(call get-executable,$(PKG_CONFIG)),) + $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it) + endif +endif + +define lib_setup + $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) + $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) + $(eval EXTLIBS += $(shell sh -c "$(PKG_CONFIG) --libs-only-l lib$(1)")) +endef + +$(call feature_check,libtraceevent) +ifeq ($(feature-libtraceevent), 1) + $(call detected,CONFIG_LIBTRACEEVENT) + $(call lib_setup,traceevent) +else + STOP_ERROR := 1 + $(info libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel) +endif + +$(call feature_check,libtracefs) +ifeq ($(feature-libtracefs), 1) + $(call detected,CONFIG_LIBTRACEFS) + $(call lib_setup,tracefs) +else + STOP_ERROR := 1 + $(info libtracefs is missing. Please install libtracefs-dev/libtracefs-devel) +endif + +ifeq ($(STOP_ERROR),1) + $(error Please, check the errors above.) +endif diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c new file mode 100644 index 000000000000..ef97916e3873 --- /dev/null +++ b/tools/tracing/latency/latency-collector.c @@ -0,0 +1,2108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017, 2018, 2019, 2021 BMW Car IT GmbH + * Author: Viktor Rosendahl (viktor.rosendahl@bmw.de) + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 200809L + +#include <ctype.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <sched.h> +#include <linux/unistd.h> +#include <signal.h> +#include <sys/inotify.h> +#include <unistd.h> +#include <pthread.h> +#include <tracefs.h> + +static const char *prg_name; +static const char *prg_unknown = "unknown program name"; + +static int fd_stdout; + +static int sched_policy; +static bool sched_policy_set; + +static int sched_pri; +static bool sched_pri_set; + +static bool trace_enable = true; +static bool setup_ftrace = true; +static bool use_random_sleep; + +#define TRACE_OPTS \ + C(FUNC_TR, "function-trace"), \ + C(DISP_GR, "display-graph"), \ + C(NR, NULL) + +#undef C +#define C(a, b) OPTIDX_##a + +enum traceopt { + TRACE_OPTS +}; + +#undef C +#define C(a, b) b + +static const char *const optstr[] = { + TRACE_OPTS +}; + +enum errhandling { + ERR_EXIT = 0, + ERR_WARN, + ERR_CLEANUP, +}; + +static bool use_options[OPTIDX_NR]; + +static char inotify_buffer[655360]; + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define bool2str(x) (x ? "true":"false") + +#define DEFAULT_NR_PRINTER_THREADS (3) +static unsigned int nr_threads = DEFAULT_NR_PRINTER_THREADS; + +#define DEFAULT_TABLE_SIZE (2) +static unsigned int table_startsize = DEFAULT_TABLE_SIZE; + +static int verbosity; + +#define verbose_sizechange() (verbosity >= 1) +#define verbose_lostevent() (verbosity >= 2) +#define verbose_ftrace() (verbosity >= 1) + +#define was_changed(ORIG, CUR) (strcmp(ORIG, CUR) != 0) +#define needs_change(CUR, WANTED) (strcmp(CUR, WANTED) != 0) + +static const char *debug_tracefile; +static const char *debug_tracefile_dflt; +static const char *debug_maxlat; +static const char *debug_maxlat_dflt; +static const char * const DEBUG_NOFILE = "[file not found]"; + +static const char * const TR_MAXLAT = "tracing_max_latency"; +static const char * const TR_THRESH = "tracing_thresh"; +static const char * const TR_CURRENT = "current_tracer"; +static const char * const TR_OPTIONS = "trace_options"; + +static const char * const NOP_TRACER = "nop"; + +static const char * const OPT_NO_PREFIX = "no"; + +#define DFLT_THRESHOLD_US "0" +static const char *threshold = DFLT_THRESHOLD_US; + +#define DEV_URANDOM "/dev/urandom" +#define RT_DEFAULT_PRI (99) +#define DEFAULT_PRI (0) + +#define USEC_PER_MSEC (1000L) +#define NSEC_PER_USEC (1000L) +#define NSEC_PER_MSEC (USEC_PER_MSEC * NSEC_PER_USEC) + +#define MSEC_PER_SEC (1000L) +#define USEC_PER_SEC (USEC_PER_MSEC * MSEC_PER_SEC) +#define NSEC_PER_SEC (NSEC_PER_MSEC * MSEC_PER_SEC) + +#define SLEEP_TIME_MS_DEFAULT (1000L) +#define TRY_PRINTMUTEX_MS (1000) + +static long sleep_time = (USEC_PER_MSEC * SLEEP_TIME_MS_DEFAULT); + +static const char * const queue_full_warning = +"Could not queue trace for printing. It is likely that events happen faster\n" +"than what they can be printed. Probably partly because of random sleeping\n"; + +static const char * const no_tracer_msg = +"Could not find any tracers! Running this program as root may help!\n"; + +static const char * const no_latency_tr_msg = +"No latency tracers are supported by your kernel!\n"; + +struct policy { + const char *name; + int policy; + int default_pri; +}; + +static const struct policy policies[] = { + { "other", SCHED_OTHER, DEFAULT_PRI }, + { "batch", SCHED_BATCH, DEFAULT_PRI }, + { "idle", SCHED_IDLE, DEFAULT_PRI }, + { "rr", SCHED_RR, RT_DEFAULT_PRI }, + { "fifo", SCHED_FIFO, RT_DEFAULT_PRI }, + { NULL, 0, DEFAULT_PRI } +}; + +/* + * The default tracer will be the first on this list that is supported by the + * currently running Linux kernel. + */ +static const char * const relevant_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + "wakeup", + "wakeup_rt", + "wakeup_dl", + NULL +}; + +/* This is the list of tracers for which random sleep makes sense */ +static const char * const random_tracers[] = { + "preemptirqsoff", + "preemptoff", + "irqsoff", + NULL +}; + +static const char *current_tracer; +static bool force_tracer; + +struct ftrace_state { + char *tracer; + char *thresh; + bool opt[OPTIDX_NR]; + bool opt_valid[OPTIDX_NR]; + pthread_mutex_t mutex; +}; + +struct entry { + int ticket; + int ticket_completed_ref; +}; + +struct print_state { + int ticket_counter; + int ticket_completed; + pthread_mutex_t mutex; + pthread_cond_t cond; + int cnt; + pthread_mutex_t cnt_mutex; +}; + +struct short_msg { + char buf[160]; + int len; +}; + +static struct print_state printstate; +static struct ftrace_state save_state; +volatile sig_atomic_t signal_flag; + +#define PROB_TABLE_MAX_SIZE (1000) + +int probabilities[PROB_TABLE_MAX_SIZE]; + +struct sleep_table { + int *table; + int size; + pthread_mutex_t mutex; +}; + +static struct sleep_table sleeptable; + +#define QUEUE_SIZE (10) + +struct queue { + struct entry entries[QUEUE_SIZE]; + int next_prod_idx; + int next_cons_idx; + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +#define MAX_THREADS (40) + +struct queue printqueue; +pthread_t printthread[MAX_THREADS]; +pthread_mutex_t print_mtx; +#define PRINT_BUFFER_SIZE (16 * 1024 * 1024) + +static void cleanup_exit(int status); +static int set_trace_opt(const char *opt, bool value); + +static __always_inline void *malloc_or_die(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) { + warn("malloc() failed"); + cleanup_exit(EXIT_FAILURE); + } + return ptr; +} + +static __always_inline void *malloc_or_die_nocleanup(size_t size) +{ + void *ptr = malloc(size); + + if (unlikely(ptr == NULL)) + err(0, "malloc() failed"); + return ptr; +} + +static __always_inline void write_or_die(int fd, const char *buf, size_t count) +{ + ssize_t r; + + do { + r = write(fd, buf, count); + if (unlikely(r < 0)) { + if (errno == EINTR) + continue; + warn("write() failed"); + cleanup_exit(EXIT_FAILURE); + } + count -= r; + buf += r; + } while (count > 0); +} + +static __always_inline void clock_gettime_or_die(clockid_t clk_id, + struct timespec *tp) +{ + int r = clock_gettime(clk_id, tp); + + if (unlikely(r != 0)) + err(EXIT_FAILURE, "clock_gettime() failed"); +} + +static __always_inline void sigemptyset_or_die(sigset_t *s) +{ + if (unlikely(sigemptyset(s) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaddset_or_die(sigset_t *s, int signum) +{ + if (unlikely(sigaddset(s, signum) != 0)) { + warn("sigemptyset() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static __always_inline void sigaction_or_die(int signum, + const struct sigaction *act, + struct sigaction *oldact) +{ + if (unlikely(sigaction(signum, act, oldact) != 0)) { + warn("sigaction() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void open_stdout(void) +{ + if (setvbuf(stdout, NULL, _IONBF, 0) != 0) + err(EXIT_FAILURE, "setvbuf() failed"); + fd_stdout = fileno(stdout); + if (fd_stdout < 0) + err(EXIT_FAILURE, "fileno() failed"); +} + +/* + * It's not worth it to call cleanup_exit() from mutex functions because + * cleanup_exit() uses mutexes. + */ +static __always_inline void mutex_lock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_lock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_lock() failed"); +} + + +static __always_inline void mutex_unlock(pthread_mutex_t *mtx) +{ + errno = pthread_mutex_unlock(mtx); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_mutex_unlock() failed"); +} + +static __always_inline void cond_signal(pthread_cond_t *cond) +{ + errno = pthread_cond_signal(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_signal() failed"); +} + +static __always_inline void cond_wait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex) +{ + errno = pthread_cond_wait(cond, mutex); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_wait() failed"); +} + +static __always_inline void cond_broadcast(pthread_cond_t *cond) +{ + errno = pthread_cond_broadcast(cond); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_cond_broadcast() failed"); +} + +static __always_inline void +mutex_init(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) +{ + errno = pthread_mutex_init(mutex, attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutex_init() failed"); +} + +static __always_inline void mutexattr_init(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_init() failed"); +} + +static __always_inline void mutexattr_destroy(pthread_mutexattr_t *attr) +{ + errno = pthread_mutexattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_destroy() failed"); +} + +static __always_inline void mutexattr_settype(pthread_mutexattr_t *attr, + int type) +{ + errno = pthread_mutexattr_settype(attr, type); + if (errno) + err(EXIT_FAILURE, "pthread_mutexattr_settype() failed"); +} + +static __always_inline void condattr_init(pthread_condattr_t *attr) +{ + errno = pthread_condattr_init(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_init() failed"); +} + +static __always_inline void condattr_destroy(pthread_condattr_t *attr) +{ + errno = pthread_condattr_destroy(attr); + if (errno) + err(EXIT_FAILURE, "pthread_condattr_destroy() failed"); +} + +static __always_inline void condattr_setclock(pthread_condattr_t *attr, + clockid_t clock_id) +{ + errno = pthread_condattr_setclock(attr, clock_id); + if (unlikely(errno)) + err(EXIT_FAILURE, "pthread_condattr_setclock() failed"); +} + +static __always_inline void cond_init(pthread_cond_t *cond, + const pthread_condattr_t *attr) +{ + errno = pthread_cond_init(cond, attr); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int +cond_timedwait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex, + const struct timespec *restrict abstime) +{ + errno = pthread_cond_timedwait(cond, mutex, abstime); + if (errno && errno != ETIMEDOUT) + err(EXIT_FAILURE, "pthread_cond_timedwait() failed"); + return errno; +} + +static void init_printstate(void) +{ + pthread_condattr_t cattr; + + printstate.ticket_counter = 0; + printstate.ticket_completed = 0; + printstate.cnt = 0; + + mutex_init(&printstate.mutex, NULL); + + condattr_init(&cattr); + condattr_setclock(&cattr, CLOCK_MONOTONIC); + cond_init(&printstate.cond, &cattr); + condattr_destroy(&cattr); +} + +static void init_print_mtx(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&print_mtx, &mattr); + mutexattr_destroy(&mattr); + +} + +static void signal_blocking(int how) +{ + sigset_t s; + + sigemptyset_or_die(&s); + sigaddset_or_die(&s, SIGHUP); + sigaddset_or_die(&s, SIGTERM); + sigaddset_or_die(&s, SIGINT); + + errno = pthread_sigmask(how, &s, NULL); + if (unlikely(errno)) { + warn("pthread_sigmask() failed"); + cleanup_exit(EXIT_FAILURE); + } +} + +static void signal_handler(int num) +{ + signal_flag = num; +} + +static void setup_sig_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = signal_handler; + + sigaction_or_die(SIGHUP, &sa, NULL); + sigaction_or_die(SIGTERM, &sa, NULL); + sigaction_or_die(SIGINT, &sa, NULL); +} + +static void process_signal(int signal) +{ + char *name; + + name = strsignal(signal); + if (name == NULL) + printf("Received signal %d\n", signal); + else + printf("Received signal %d (%s)\n", signal, name); + cleanup_exit(EXIT_SUCCESS); +} + +static __always_inline void check_signals(void) +{ + int signal = signal_flag; + + if (unlikely(signal)) + process_signal(signal); +} + +static __always_inline void get_time_in_future(struct timespec *future, + long time_us) +{ + long nsec; + + clock_gettime_or_die(CLOCK_MONOTONIC, future); + future->tv_sec += time_us / USEC_PER_SEC; + nsec = future->tv_nsec + (time_us * NSEC_PER_USEC) % NSEC_PER_SEC; + if (nsec >= NSEC_PER_SEC) { + future->tv_nsec = nsec % NSEC_PER_SEC; + future->tv_sec += 1; + } +} + +static __always_inline bool time_has_passed(const struct timespec *time) +{ + struct timespec now; + + clock_gettime_or_die(CLOCK_MONOTONIC, &now); + if (now.tv_sec > time->tv_sec) + return true; + if (now.tv_sec < time->tv_sec) + return false; + return (now.tv_nsec >= time->tv_nsec); +} + +static bool mutex_trylock_limit(pthread_mutex_t *mutex, int time_ms) +{ + long time_us = time_ms * USEC_PER_MSEC; + struct timespec limit; + + get_time_in_future(&limit, time_us); + do { + errno = pthread_mutex_trylock(mutex); + if (errno && errno != EBUSY) + err(EXIT_FAILURE, "pthread_mutex_trylock() failed"); + } while (errno && !time_has_passed(&limit)); + return errno == 0; +} + +static void restore_trace_opts(const struct ftrace_state *state, + const bool *cur) +{ + int i; + int r; + + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && state->opt[i] != cur[i]) { + r = set_trace_opt(optstr[i], state->opt[i]); + if (r < 0) + warnx("Failed to restore the %s option to %s", + optstr[i], bool2str(state->opt[i])); + else if (verbose_ftrace()) + printf("Restored the %s option in %s to %s\n", + optstr[i], TR_OPTIONS, + bool2str(state->opt[i])); + } +} + +static char *read_file(const char *file, enum errhandling h) +{ + int psize; + char *r; + static const char *emsg = "Failed to read the %s file"; + + r = tracefs_instance_file_read(NULL, file, &psize); + if (!r) { + if (h) { + warn(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + + if (r && r[psize - 1] == '\n') + r[psize - 1] = '\0'; + return r; +} + +static void restore_file(const char *file, char **saved, const char *cur) +{ + if (*saved && was_changed(*saved, cur)) { + if (tracefs_instance_file_write(NULL, file, *saved) < 0) + warnx("Failed to restore %s to %s!", file, *saved); + else if (verbose_ftrace()) + printf("Restored %s to %s\n", file, *saved); + free(*saved); + *saved = NULL; + } +} + +static void restore_ftrace(void) +{ + mutex_lock(&save_state.mutex); + + restore_file(TR_CURRENT, &save_state.tracer, current_tracer); + restore_file(TR_THRESH, &save_state.thresh, threshold); + restore_trace_opts(&save_state, use_options); + + mutex_unlock(&save_state.mutex); +} + +static void cleanup_exit(int status) +{ + char *maxlat; + + if (!setup_ftrace) + exit(status); + + /* + * We try the print_mtx for 1 sec in order to avoid garbled + * output if possible, but if it cannot be obtained we proceed anyway. + */ + mutex_trylock_limit(&print_mtx, TRY_PRINTMUTEX_MS); + + maxlat = read_file(TR_MAXLAT, ERR_WARN); + if (maxlat) { + printf("The maximum detected latency was: %sus\n", maxlat); + free(maxlat); + } + + restore_ftrace(); + /* + * We do not need to unlock the print_mtx here because we will exit at + * the end of this function. Unlocking print_mtx causes problems if a + * print thread happens to be waiting for the mutex because we have + * just changed the ftrace settings to the original and thus the + * print thread would output incorrect data from ftrace. + */ + exit(status); +} + +static void init_save_state(void) +{ + pthread_mutexattr_t mattr; + + mutexattr_init(&mattr); + mutexattr_settype(&mattr, PTHREAD_MUTEX_RECURSIVE); + mutex_init(&save_state.mutex, &mattr); + mutexattr_destroy(&mattr); + + save_state.tracer = NULL; + save_state.thresh = NULL; + save_state.opt_valid[OPTIDX_FUNC_TR] = false; + save_state.opt_valid[OPTIDX_DISP_GR] = false; +} + +static int printstate_next_ticket(struct entry *req) +{ + int r; + + r = ++(printstate.ticket_counter); + req->ticket = r; + req->ticket_completed_ref = printstate.ticket_completed; + cond_broadcast(&printstate.cond); + return r; +} + +static __always_inline +void printstate_mark_req_completed(const struct entry *req) +{ + if (req->ticket > printstate.ticket_completed) + printstate.ticket_completed = req->ticket; +} + +static __always_inline +bool printstate_has_new_req_arrived(const struct entry *req) +{ + return (printstate.ticket_counter != req->ticket); +} + +static __always_inline int printstate_cnt_inc(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = ++printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_dec(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = --printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline int printstate_cnt_read(void) +{ + int value; + + mutex_lock(&printstate.cnt_mutex); + value = printstate.cnt; + mutex_unlock(&printstate.cnt_mutex); + return value; +} + +static __always_inline +bool prev_req_won_race(const struct entry *req) +{ + return (printstate.ticket_completed != req->ticket_completed_ref); +} + +static void sleeptable_resize(int size, bool printout, struct short_msg *msg) +{ + int bytes; + + if (printout) { + msg->len = 0; + if (unlikely(size > PROB_TABLE_MAX_SIZE)) + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Cannot increase probability table to %d (maximum size reached)\n", size); + else + bytes = snprintf(msg->buf, sizeof(msg->buf), +"Increasing probability table to %d\n", size); + if (bytes < 0) + warn("snprintf() failed"); + else + msg->len = bytes; + } + + if (unlikely(size < 0)) { + /* Should never happen */ + warnx("Bad program state at %s:%d", __FILE__, __LINE__); + cleanup_exit(EXIT_FAILURE); + return; + } + sleeptable.size = size; + sleeptable.table = &probabilities[PROB_TABLE_MAX_SIZE - size]; +} + +static void init_probabilities(void) +{ + int i; + int j = 1000; + + for (i = 0; i < PROB_TABLE_MAX_SIZE; i++) { + probabilities[i] = 1000 / j; + j--; + } + mutex_init(&sleeptable.mutex, NULL); +} + +static int table_get_probability(const struct entry *req, + struct short_msg *msg) +{ + int diff = req->ticket - req->ticket_completed_ref; + int rval = 0; + + msg->len = 0; + diff--; + /* Should never happen...*/ + if (unlikely(diff < 0)) { + warnx("Programmer assumption error at %s:%d\n", __FILE__, + __LINE__); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&sleeptable.mutex); + if (diff >= (sleeptable.size - 1)) { + rval = sleeptable.table[sleeptable.size - 1]; + sleeptable_resize(sleeptable.size + 1, verbose_sizechange(), + msg); + } else { + rval = sleeptable.table[diff]; + } + mutex_unlock(&sleeptable.mutex); + return rval; +} + +static void init_queue(struct queue *q) +{ + q->next_prod_idx = 0; + q->next_cons_idx = 0; + mutex_init(&q->mutex, NULL); + errno = pthread_cond_init(&q->cond, NULL); + if (errno) + err(EXIT_FAILURE, "pthread_cond_init() failed"); +} + +static __always_inline int queue_len(const struct queue *q) +{ + if (q->next_prod_idx >= q->next_cons_idx) + return q->next_prod_idx - q->next_cons_idx; + else + return QUEUE_SIZE - q->next_cons_idx + q->next_prod_idx; +} + +static __always_inline int queue_nr_free(const struct queue *q) +{ + int nr_free = QUEUE_SIZE - queue_len(q); + + /* + * If there is only one slot left we will anyway lie and claim that the + * queue is full because adding an element will make it appear empty + */ + if (nr_free == 1) + nr_free = 0; + return nr_free; +} + +static __always_inline void queue_idx_inc(int *idx) +{ + *idx = (*idx + 1) % QUEUE_SIZE; +} + +static __always_inline void queue_push_to_back(struct queue *q, + const struct entry *e) +{ + q->entries[q->next_prod_idx] = *e; + queue_idx_inc(&q->next_prod_idx); +} + +static __always_inline struct entry queue_pop_from_front(struct queue *q) +{ + struct entry e = q->entries[q->next_cons_idx]; + + queue_idx_inc(&q->next_cons_idx); + return e; +} + +static __always_inline void queue_cond_signal(struct queue *q) +{ + cond_signal(&q->cond); +} + +static __always_inline void queue_cond_wait(struct queue *q) +{ + cond_wait(&q->cond, &q->mutex); +} + +static __always_inline int queue_try_to_add_entry(struct queue *q, + const struct entry *e) +{ + int r = 0; + + mutex_lock(&q->mutex); + if (queue_nr_free(q) > 0) { + queue_push_to_back(q, e); + cond_signal(&q->cond); + } else + r = -1; + mutex_unlock(&q->mutex); + return r; +} + +static struct entry queue_wait_for_entry(struct queue *q) +{ + struct entry e; + + mutex_lock(&q->mutex); + while (true) { + if (queue_len(&printqueue) > 0) { + e = queue_pop_from_front(q); + break; + } + queue_cond_wait(q); + } + mutex_unlock(&q->mutex); + + return e; +} + +static const struct policy *policy_from_name(const char *name) +{ + const struct policy *p = &policies[0]; + + while (p->name != NULL) { + if (!strcmp(name, p->name)) + return p; + p++; + } + return NULL; +} + +static const char *policy_name(int policy) +{ + const struct policy *p = &policies[0]; + static const char *rval = "unknown"; + + while (p->name != NULL) { + if (p->policy == policy) + return p->name; + p++; + } + return rval; +} + +static bool is_relevant_tracer(const char *name) +{ + unsigned int i; + + for (i = 0; relevant_tracers[i]; i++) + if (!strcmp(name, relevant_tracers[i])) + return true; + return false; +} + +static bool random_makes_sense(const char *name) +{ + unsigned int i; + + for (i = 0; random_tracers[i]; i++) + if (!strcmp(name, random_tracers[i])) + return true; + return false; +} + +static void show_available(void) +{ + char **tracers; + int found = 0; + int i; + + tracers = tracefs_tracers(NULL); + for (i = 0; tracers && tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + found++; + } + + if (!tracers) { + warnx("%s", no_tracer_msg); + return; + } + + if (!found) { + warnx("%s", no_latency_tr_msg); + tracefs_list_free(tracers); + return; + } + + printf("The following latency tracers are available on your system:\n"); + for (i = 0; tracers[i]; i++) { + if (is_relevant_tracer(tracers[i])) + printf("%s\n", tracers[i]); + } + tracefs_list_free(tracers); +} + +static bool tracer_valid(const char *name, bool *notracer) +{ + char **tracers; + int i; + bool rval = false; + + *notracer = false; + tracers = tracefs_tracers(NULL); + if (!tracers) { + *notracer = true; + return false; + } + for (i = 0; tracers[i]; i++) + if (!strcmp(tracers[i], name)) { + rval = true; + break; + } + tracefs_list_free(tracers); + return rval; +} + +static const char *find_default_tracer(void) +{ + int i; + bool notracer; + bool valid; + + for (i = 0; relevant_tracers[i]; i++) { + valid = tracer_valid(relevant_tracers[i], ¬racer); + if (notracer) + errx(EXIT_FAILURE, "%s", no_tracer_msg); + if (valid) + return relevant_tracers[i]; + } + return NULL; +} + +static bool toss_coin(struct drand48_data *buffer, unsigned int prob) +{ + long r; + + if (unlikely(lrand48_r(buffer, &r))) { + warnx("lrand48_r() failed"); + cleanup_exit(EXIT_FAILURE); + } + r = r % 1000L; + if (r < prob) + return true; + else + return false; +} + + +static long go_to_sleep(const struct entry *req) +{ + struct timespec future; + long delay = sleep_time; + + get_time_in_future(&future, delay); + + mutex_lock(&printstate.mutex); + while (!printstate_has_new_req_arrived(req)) { + cond_timedwait(&printstate.cond, &printstate.mutex, &future); + if (time_has_passed(&future)) + break; + } + + if (printstate_has_new_req_arrived(req)) + delay = -1; + mutex_unlock(&printstate.mutex); + + return delay; +} + + +static void set_priority(void) +{ + int r; + pid_t pid; + struct sched_param param; + + memset(¶m, 0, sizeof(param)); + param.sched_priority = sched_pri; + + pid = getpid(); + r = sched_setscheduler(pid, sched_policy, ¶m); + + if (r != 0) + err(EXIT_FAILURE, "sched_setscheduler() failed"); +} + +pid_t latency_collector_gettid(void) +{ + return (pid_t) syscall(__NR_gettid); +} + +static void print_priority(void) +{ + pid_t tid; + int policy; + int r; + struct sched_param param; + + tid = latency_collector_gettid(); + r = pthread_getschedparam(pthread_self(), &policy, ¶m); + if (r != 0) { + warn("pthread_getschedparam() failed"); + cleanup_exit(EXIT_FAILURE); + } + mutex_lock(&print_mtx); + printf("Thread %d runs with scheduling policy %s and priority %d\n", + tid, policy_name(policy), param.sched_priority); + mutex_unlock(&print_mtx); +} + +static __always_inline +void __print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, bool excuse, + const char *str) +{ + ssize_t bytes = 0; + char *p = &buffer[0]; + long us, sec; + int r; + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (resize_msg != NULL && resize_msg->len > 0) { + strncpy(p, resize_msg->buf, resize_msg->len); + bytes += resize_msg->len; + p += resize_msg->len; + bufspace -= resize_msg->len; + } + + if (excuse) + r = snprintf(p, bufspace, +"%ld.%06ld Latency %d printout skipped due to %s\n", + sec, us, req->ticket, str); + else + r = snprintf(p, bufspace, "%ld.%06ld Latency %d detected\n", + sec, us, req->ticket); + + if (r < 0) + warn("snprintf() failed"); + else + bytes += r; + + /* These prints could happen concurrently */ + mutex_lock(&print_mtx); + write_or_die(fd_stdout, buffer, bytes); + mutex_unlock(&print_mtx); +} + +static void print_skipmessage(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, + bool excuse) +{ + __print_skipmessage(resize_msg, timestamp, buffer, bufspace, req, + excuse, "random delay"); +} + +static void print_lostmessage(const struct timespec *timestamp, char *buffer, + size_t bufspace, const struct entry *req, + const char *reason) +{ + __print_skipmessage(NULL, timestamp, buffer, bufspace, req, true, + reason); +} + +static void print_tracefile(const struct short_msg *resize_msg, + const struct timespec *timestamp, char *buffer, + size_t bufspace, long slept, + const struct entry *req) +{ + static const int reserve = 256; + char *p = &buffer[0]; + ssize_t bytes = 0; + ssize_t bytes_tot = 0; + long us, sec; + long slept_ms; + int trace_fd; + + /* Save some space for the final string and final null char */ + bufspace = bufspace - reserve - 1; + + if (resize_msg != NULL && resize_msg->len > 0) { + bytes = resize_msg->len; + strncpy(p, resize_msg->buf, bytes); + bytes_tot += bytes; + p += bytes; + bufspace -= bytes; + } + + trace_fd = open(debug_tracefile, O_RDONLY); + + if (trace_fd < 0) { + warn("open() failed on %s", debug_tracefile); + return; + } + + sec = timestamp->tv_sec; + us = timestamp->tv_nsec / 1000; + + if (slept != 0) { + slept_ms = slept / 1000; + bytes = snprintf(p, bufspace, +"%ld.%06ld Latency %d randomly sleep for %ld ms before print\n", + sec, us, req->ticket, slept_ms); + } else { + bytes = snprintf(p, bufspace, + "%ld.%06ld Latency %d immediate print\n", sec, + us, req->ticket); + } + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + + bytes = snprintf(p, bufspace, +">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> BEGIN <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n" + ); + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + + do { + bytes = read(trace_fd, p, bufspace); + if (bytes < 0) { + if (errno == EINTR) + continue; + warn("read() failed on %s", debug_tracefile); + if (unlikely(close(trace_fd) != 0)) + warn("close() failed on %s", debug_tracefile); + return; + } + if (bytes == 0) + break; + p += bytes; + bufspace -= bytes; + bytes_tot += bytes; + } while (true); + + if (unlikely(close(trace_fd) != 0)) + warn("close() failed on %s", debug_tracefile); + + printstate_cnt_dec(); + /* Add the reserve space back to the budget for the final string */ + bufspace += reserve; + + bytes = snprintf(p, bufspace, + ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> END <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n\n"); + + if (bytes < 0) { + warn("snprintf() failed"); + return; + } + + bytes_tot += bytes; + + /* These prints could happen concurrently */ + mutex_lock(&print_mtx); + write_or_die(fd_stdout, buffer, bytes_tot); + mutex_unlock(&print_mtx); +} + +static char *get_no_opt(const char *opt) +{ + char *no_opt; + int s; + + s = strlen(opt) + strlen(OPT_NO_PREFIX) + 1; + /* We may be called from cleanup_exit() via set_trace_opt() */ + no_opt = malloc_or_die_nocleanup(s); + strcpy(no_opt, OPT_NO_PREFIX); + strcat(no_opt, opt); + return no_opt; +} + +static char *find_next_optstr(const char *allopt, const char **next) +{ + const char *begin; + const char *end; + char *r; + int s = 0; + + if (allopt == NULL) + return NULL; + + for (begin = allopt; *begin != '\0'; begin++) { + if (isgraph(*begin)) + break; + } + + if (*begin == '\0') + return NULL; + + for (end = begin; *end != '\0' && isgraph(*end); end++) + s++; + + r = malloc_or_die_nocleanup(s + 1); + strncpy(r, begin, s); + r[s] = '\0'; + *next = begin + s; + return r; +} + +static bool get_trace_opt(const char *allopt, const char *opt, bool *found) +{ + *found = false; + char *no_opt; + char *str; + const char *next = allopt; + bool rval = false; + + no_opt = get_no_opt(opt); + + do { + str = find_next_optstr(next, &next); + if (str == NULL) + break; + if (!strcmp(str, opt)) { + *found = true; + rval = true; + free(str); + break; + } + if (!strcmp(str, no_opt)) { + *found = true; + rval = false; + free(str); + break; + } + free(str); + } while (true); + free(no_opt); + + return rval; +} + +static int set_trace_opt(const char *opt, bool value) +{ + char *str; + int r; + + if (value) + str = strdup(opt); + else + str = get_no_opt(opt); + + r = tracefs_instance_file_write(NULL, TR_OPTIONS, str); + free(str); + return r; +} + +void save_trace_opts(struct ftrace_state *state) +{ + char *allopt; + int psize; + int i; + + allopt = tracefs_instance_file_read(NULL, TR_OPTIONS, &psize); + if (!allopt) + errx(EXIT_FAILURE, "Failed to read the %s file\n", TR_OPTIONS); + + for (i = 0; i < OPTIDX_NR; i++) + state->opt[i] = get_trace_opt(allopt, optstr[i], + &state->opt_valid[i]); + + free(allopt); +} + +static void write_file(const char *file, const char *cur, const char *new, + enum errhandling h) +{ + int r; + static const char *emsg = "Failed to write to the %s file!"; + + /* Do nothing if we now that the current and new value are equal */ + if (cur && !needs_change(cur, new)) + return; + + r = tracefs_instance_file_write(NULL, file, new); + if (r < 0) { + if (h) { + warnx(emsg, file); + if (h == ERR_CLEANUP) + cleanup_exit(EXIT_FAILURE); + } else + errx(EXIT_FAILURE, emsg, file); + } + if (verbose_ftrace()) { + mutex_lock(&print_mtx); + printf("%s was set to %s\n", file, new); + mutex_unlock(&print_mtx); + } +} + +static void reset_max_latency(void) +{ + write_file(TR_MAXLAT, NULL, "0", ERR_CLEANUP); +} + +static void save_and_disable_tracer(void) +{ + char *orig_th; + char *tracer; + bool need_nop = false; + + mutex_lock(&save_state.mutex); + + save_trace_opts(&save_state); + tracer = read_file(TR_CURRENT, ERR_EXIT); + orig_th = read_file(TR_THRESH, ERR_EXIT); + + if (needs_change(tracer, NOP_TRACER)) { + mutex_lock(&print_mtx); + if (force_tracer) { + printf( + "The %s tracer is already in use but proceeding anyway!\n", + tracer); + } else { + printf( + "The %s tracer is already in use, cowardly bailing out!\n" + "This could indicate that another program or instance is tracing.\n" + "Use the -F [--force] option to disregard the current tracer.\n", tracer); + exit(0); + } + mutex_unlock(&print_mtx); + need_nop = true; + } + + save_state.tracer = tracer; + save_state.thresh = orig_th; + + if (need_nop) + write_file(TR_CURRENT, NULL, NOP_TRACER, ERR_EXIT); + + mutex_unlock(&save_state.mutex); +} + +void set_trace_opts(struct ftrace_state *state, bool *new) +{ + int i; + int r; + + /* + * We only set options if we earlier detected that the option exists in + * the trace_options file and that the wanted setting is different from + * the one we saw in save_and_disable_tracer() + */ + for (i = 0; i < OPTIDX_NR; i++) + if (state->opt_valid[i] && + state->opt[i] != new[i]) { + r = set_trace_opt(optstr[i], new[i]); + if (r < 0) { + warnx("Failed to set the %s option to %s", + optstr[i], bool2str(new[i])); + cleanup_exit(EXIT_FAILURE); + } + if (verbose_ftrace()) { + mutex_lock(&print_mtx); + printf("%s in %s was set to %s\n", optstr[i], + TR_OPTIONS, bool2str(new[i])); + mutex_unlock(&print_mtx); + } + } +} + +static void enable_tracer(void) +{ + mutex_lock(&save_state.mutex); + set_trace_opts(&save_state, use_options); + + write_file(TR_THRESH, save_state.thresh, threshold, ERR_CLEANUP); + write_file(TR_CURRENT, NOP_TRACER, current_tracer, ERR_CLEANUP); + + mutex_unlock(&save_state.mutex); +} + +static void tracing_loop(void) +{ + int ifd = inotify_init(); + int wd; + const ssize_t bufsize = sizeof(inotify_buffer); + const ssize_t istructsize = sizeof(struct inotify_event); + char *buf = &inotify_buffer[0]; + ssize_t nr_read; + char *p; + int modified; + struct inotify_event *event; + struct entry req; + char *buffer; + const size_t bufspace = PRINT_BUFFER_SIZE; + struct timespec timestamp; + + print_priority(); + + buffer = malloc_or_die(bufspace); + + if (ifd < 0) + err(EXIT_FAILURE, "inotify_init() failed!"); + + + if (setup_ftrace) { + /* + * We must disable the tracer before resetting the max_latency + */ + save_and_disable_tracer(); + /* + * We must reset the max_latency before the inotify_add_watch() + * call. + */ + reset_max_latency(); + } + + wd = inotify_add_watch(ifd, debug_maxlat, IN_MODIFY); + if (wd < 0) + err(EXIT_FAILURE, "inotify_add_watch() failed!"); + + if (setup_ftrace) + enable_tracer(); + + signal_blocking(SIG_UNBLOCK); + + while (true) { + modified = 0; + check_signals(); + nr_read = read(ifd, buf, bufsize); + check_signals(); + if (nr_read < 0) { + if (errno == EINTR) + continue; + warn("read() failed on inotify fd!"); + cleanup_exit(EXIT_FAILURE); + } + if (nr_read == bufsize) + warnx("inotify() buffer filled, skipping events"); + if (nr_read < istructsize) { + warnx("read() returned too few bytes on inotify fd"); + cleanup_exit(EXIT_FAILURE); + } + + for (p = buf; p < buf + nr_read;) { + event = (struct inotify_event *) p; + if ((event->mask & IN_MODIFY) != 0) + modified++; + p += istructsize + event->len; + } + while (modified > 0) { + check_signals(); + mutex_lock(&printstate.mutex); + check_signals(); + printstate_next_ticket(&req); + if (printstate_cnt_read() > 0) { + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + if (verbose_lostevent()) { + clock_gettime_or_die(CLOCK_MONOTONIC, + ×tamp); + print_lostmessage(×tamp, buffer, + bufspace, &req, + "inotify loop"); + } + break; + } + mutex_unlock(&printstate.mutex); + if (queue_try_to_add_entry(&printqueue, &req) != 0) { + /* These prints could happen concurrently */ + check_signals(); + mutex_lock(&print_mtx); + check_signals(); + write_or_die(fd_stdout, queue_full_warning, + strlen(queue_full_warning)); + mutex_unlock(&print_mtx); + } + modified--; + } + } +} + +static void *do_printloop(void *arg) +{ + const size_t bufspace = PRINT_BUFFER_SIZE; + char *buffer; + long *rseed = (long *) arg; + struct drand48_data drandbuf; + long slept = 0; + struct entry req; + int prob = 0; + struct timespec timestamp; + struct short_msg resize_msg; + + print_priority(); + + if (srand48_r(*rseed, &drandbuf) != 0) { + warn("srand48_r() failed!\n"); + cleanup_exit(EXIT_FAILURE); + } + + buffer = malloc_or_die(bufspace); + + while (true) { + req = queue_wait_for_entry(&printqueue); + clock_gettime_or_die(CLOCK_MONOTONIC, ×tamp); + mutex_lock(&printstate.mutex); + if (prev_req_won_race(&req)) { + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + if (verbose_lostevent()) + print_lostmessage(×tamp, buffer, bufspace, + &req, "print loop"); + continue; + } + mutex_unlock(&printstate.mutex); + + /* + * Toss a coin to decide if we want to sleep before printing + * out the backtrace. The reason for this is that opening + * /sys/kernel/tracing/trace will cause a blackout of + * hundreds of ms, where no latencies will be noted by the + * latency tracer. Thus by randomly sleeping we try to avoid + * missing traces systematically due to this. With this option + * we will sometimes get the first latency, some other times + * some of the later ones, in case of closely spaced traces. + */ + if (trace_enable && use_random_sleep) { + slept = 0; + prob = table_get_probability(&req, &resize_msg); + if (!toss_coin(&drandbuf, prob)) + slept = go_to_sleep(&req); + if (slept >= 0) { + /* A print is ongoing */ + printstate_cnt_inc(); + /* + * We will do the printout below so we have to + * mark it as completed while we still have the + * mutex. + */ + mutex_lock(&printstate.mutex); + printstate_mark_req_completed(&req); + mutex_unlock(&printstate.mutex); + } + } + if (trace_enable) { + /* + * slept < 0 means that we detected another + * notification in go_to_sleep() above + */ + if (slept >= 0) + /* + * N.B. printstate_cnt_dec(); will be called + * inside print_tracefile() + */ + print_tracefile(&resize_msg, ×tamp, buffer, + bufspace, slept, &req); + else + print_skipmessage(&resize_msg, ×tamp, + buffer, bufspace, &req, true); + } else { + print_skipmessage(&resize_msg, ×tamp, buffer, + bufspace, &req, false); + } + } + return NULL; +} + +static void start_printthread(void) +{ + unsigned int i; + long *seed; + int ufd; + + ufd = open(DEV_URANDOM, O_RDONLY); + if (nr_threads > MAX_THREADS) { + warnx( +"Number of requested print threads was %d, max number is %d\n", + nr_threads, MAX_THREADS); + nr_threads = MAX_THREADS; + } + for (i = 0; i < nr_threads; i++) { + seed = malloc_or_die(sizeof(*seed)); + if (ufd < 0 || + read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) { + printf( +"Warning! Using trivial random number seed, since %s not available\n", + DEV_URANDOM); + fflush(stdout); + *seed = i; + } + errno = pthread_create(&printthread[i], NULL, do_printloop, + seed); + if (errno) + err(EXIT_FAILURE, "pthread_create()"); + } + if (ufd > 0 && close(ufd) != 0) + warn("close() failed"); +} + +static void show_usage(void) +{ + printf( +"Usage: %s [OPTION]...\n\n" +"Collect closely occurring latencies from %s\n" +"with any of the following tracers: preemptirqsoff, preemptoff, irqsoff, " +"wakeup,\nwakeup_dl, or wakeup_rt.\n\n" + +"The occurrence of a latency is detected by monitoring the file\n" +"%s with inotify.\n\n" + +"The following options are supported:\n\n" + +"-l, --list\t\tList the latency tracers that are supported by the\n" +"\t\t\tcurrently running Linux kernel. If you don't see the\n" +"\t\t\ttracer that you want, you will probably need to\n" +"\t\t\tchange your kernel config and build a new kernel.\n\n" + +"-t, --tracer TR\t\tUse the tracer TR. The default is to use the first\n" +"\t\t\ttracer that is supported by the kernel in the following\n" +"\t\t\torder of precedence:\n\n" +"\t\t\tpreemptirqsoff\n" +"\t\t\tpreemptoff\n" +"\t\t\tirqsoff\n" +"\t\t\twakeup\n" +"\t\t\twakeup_rt\n" +"\t\t\twakeup_dl\n" +"\n" +"\t\t\tIf TR is not on the list above, then a warning will be\n" +"\t\t\tprinted.\n\n" + +"-F, --force\t\tProceed even if another ftrace tracer is active. Without\n" +"\t\t\tthis option, the program will refuse to start tracing if\n" +"\t\t\tany other tracer than the nop tracer is active.\n\n" + +"-s, --threshold TH\tConfigure ftrace to use a threshold of TH microseconds\n" +"\t\t\tfor the tracer. The default is 0, which means that\n" +"\t\t\ttracing_max_latency will be used. tracing_max_latency is\n" +"\t\t\tset to 0 when the program is started and contains the\n" +"\t\t\tmaximum of the latencies that have been encountered.\n\n" + +"-f, --function\t\tEnable the function-trace option in trace_options. With\n" +"\t\t\tthis option, ftrace will trace the functions that are\n" +"\t\t\texecuted during a latency, without it we only get the\n" +"\t\t\tbeginning, end, and backtrace.\n\n" + +"-g, --graph\t\tEnable the display-graph option in trace_option. This\n" +"\t\t\toption causes ftrace to show the graph of how functions\n" +"\t\t\tare calling other functions.\n\n" + +"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n" +"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n" +"\t\t\tusing rr or fifo, remember that these policies may cause\n" +"\t\t\tother tasks to experience latencies.\n\n" + +"-p, --priority PRI\tRun the program with priority PRI. The acceptable range\n" +"\t\t\tof PRI depends on the scheduling policy.\n\n" + +"-n, --notrace\t\tIf latency is detected, do not print out the content of\n" +"\t\t\tthe trace file to standard output\n\n" + +"-e, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" + +"-r, --random\t\tArbitrarily sleep a certain amount of time, default\n" +"\t\t\t%ld ms, before reading the trace file. The\n" +"\t\t\tprobabilities for sleep are chosen so that the\n" +"\t\t\tprobability of obtaining any of a cluster of closely\n" +"\t\t\toccurring latencies are equal, i.e. we will randomly\n" +"\t\t\tchoose which one we collect from the trace file.\n\n" +"\t\t\tThis option is probably only useful with the irqsoff,\n" +"\t\t\tpreemptoff, and preemptirqsoff tracers.\n\n" + +"-a, --nrlat NRLAT\tFor the purpose of arbitrary delay, assume that there\n" +"\t\t\tare no more than NRLAT clustered latencies. If NRLAT\n" +"\t\t\tlatencies are detected during a run, this value will\n" +"\t\t\tautomatically be increased to NRLAT + 1 and then to\n" +"\t\t\tNRLAT + 2 and so on. The default is %d. This option\n" +"\t\t\timplies -r. We need to know this number in order to\n" +"\t\t\tbe able to calculate the probabilities of sleeping.\n" +"\t\t\tSpecifically, the probabilities of not sleeping, i.e. to\n" +"\t\t\tdo an immediate printout will be:\n\n" +"\t\t\t1/NRLAT 1/(NRLAT - 1) ... 1/3 1/2 1\n\n" +"\t\t\tThe probability of sleeping will be:\n\n" +"\t\t\t1 - P, where P is from the series above\n\n" +"\t\t\tThis descending probability will cause us to choose\n" +"\t\t\tan occurrence at random. Observe that the final\n" +"\t\t\tprobability is 0, it is when we reach this probability\n" +"\t\t\tthat we increase NRLAT automatically. As an example,\n" +"\t\t\twith the default value of 2, the probabilities will be:\n\n" +"\t\t\t1/2 0\n\n" +"\t\t\tThis means, when a latency is detected we will sleep\n" +"\t\t\twith 50%% probability. If we ever detect another latency\n" +"\t\t\tduring the sleep period, then the probability of sleep\n" +"\t\t\twill be 0%% and the table will be expanded to:\n\n" +"\t\t\t1/3 1/2 0\n\n" + +"-v, --verbose\t\tIncrease the verbosity. If this option is given once,\n" +"\t\t\tthen print a message every time that the NRLAT value\n" +"\t\t\tis automatically increased. It also causes a message to\n" +"\t\t\tbe printed when the ftrace settings are changed. If this\n" +"\t\t\toption is given at least twice, then also print a\n" +"\t\t\twarning for lost events.\n\n" + +"-u, --time TIME\t\tArbitrarily sleep for a specified time TIME ms before\n" +"\t\t\tprinting out the trace from the trace file. The default\n" +"\t\t\tis %ld ms. This option implies -r.\n\n" + +"-x, --no-ftrace\t\tDo not configure ftrace. This assume that the user\n" +"\t\t\tconfigures the ftrace files in sysfs such as\n" +"\t\t\t/sys/kernel/tracing/current_tracer or equivalent.\n\n" + +"-i, --tracefile FILE\tUse FILE as trace file. The default is\n" +"\t\t\t%s.\n" +"\t\t\tThis options implies -x\n\n" + +"-m, --max-lat FILE\tUse FILE as tracing_max_latency file. The default is\n" +"\t\t\t%s.\n" +"\t\t\tThis options implies -x\n\n" +, +prg_name, debug_tracefile_dflt, debug_maxlat_dflt, DEFAULT_NR_PRINTER_THREADS, +SLEEP_TIME_MS_DEFAULT, DEFAULT_TABLE_SIZE, SLEEP_TIME_MS_DEFAULT, +debug_tracefile_dflt, debug_maxlat_dflt); +} + +static void find_tracefiles(void) +{ + debug_tracefile_dflt = tracefs_get_tracing_file("trace"); + if (debug_tracefile_dflt == NULL) { + /* This is needed in show_usage() */ + debug_tracefile_dflt = DEBUG_NOFILE; + } + + debug_maxlat_dflt = tracefs_get_tracing_file("tracing_max_latency"); + if (debug_maxlat_dflt == NULL) { + /* This is needed in show_usage() */ + debug_maxlat_dflt = DEBUG_NOFILE; + } + + debug_tracefile = debug_tracefile_dflt; + debug_maxlat = debug_maxlat_dflt; +} + +bool alldigits(const char *s) +{ + for (; *s != '\0'; s++) + if (!isdigit(*s)) + return false; + return true; +} + +void check_alldigits(const char *optarg, const char *argname) +{ + if (!alldigits(optarg)) + errx(EXIT_FAILURE, + "The %s parameter expects a decimal argument\n", argname); +} + +static void scan_arguments(int argc, char *argv[]) +{ + int c; + int i; + int option_idx = 0; + + static struct option long_options[] = { + { "list", no_argument, 0, 'l' }, + { "tracer", required_argument, 0, 't' }, + { "force", no_argument, 0, 'F' }, + { "threshold", required_argument, 0, 's' }, + { "function", no_argument, 0, 'f' }, + { "graph", no_argument, 0, 'g' }, + { "policy", required_argument, 0, 'c' }, + { "priority", required_argument, 0, 'p' }, + { "help", no_argument, 0, 'h' }, + { "notrace", no_argument, 0, 'n' }, + { "random", no_argument, 0, 'r' }, + { "nrlat", required_argument, 0, 'a' }, + { "threads", required_argument, 0, 'e' }, + { "time", required_argument, 0, 'u' }, + { "verbose", no_argument, 0, 'v' }, + { "no-ftrace", no_argument, 0, 'x' }, + { "tracefile", required_argument, 0, 'i' }, + { "max-lat", required_argument, 0, 'm' }, + { 0, 0, 0, 0 } + }; + const struct policy *p; + int max, min; + int value; + bool notracer, valid; + + /* + * We must do this before parsing the arguments because show_usage() + * needs to display these. + */ + find_tracefiles(); + + while (true) { + c = getopt_long(argc, argv, "lt:Fs:fgc:p:hnra:e:u:vxi:m:", + long_options, &option_idx); + if (c == -1) + break; + + switch (c) { + case 'l': + show_available(); + exit(0); + break; + case 't': + current_tracer = strdup(optarg); + if (!is_relevant_tracer(current_tracer)) { + warnx("%s is not a known latency tracer!\n", + current_tracer); + } + valid = tracer_valid(current_tracer, ¬racer); + if (notracer) + errx(EXIT_FAILURE, "%s", no_tracer_msg); + if (!valid) + errx(EXIT_FAILURE, +"The tracer %s is not supported by your kernel!\n", current_tracer); + break; + case 'F': + force_tracer = true; + break; + case 's': + check_alldigits(optarg, "-s [--threshold]"); + threshold = strdup(optarg); + break; + case 'f': + use_options[OPTIDX_FUNC_TR] = true; + break; + case 'g': + use_options[OPTIDX_DISP_GR] = true; + break; + case 'c': + p = policy_from_name(optarg); + if (p != NULL) { + sched_policy = p->policy; + sched_policy_set = true; + if (!sched_pri_set) { + sched_pri = p->default_pri; + sched_pri_set = true; + } + } else { + warnx("Unknown scheduling %s\n", optarg); + show_usage(); + exit(0); + } + break; + case 'p': + check_alldigits(optarg, "-p [--priority]"); + sched_pri = atoi(optarg); + sched_pri_set = true; + break; + case 'h': + show_usage(); + exit(0); + break; + case 'n': + trace_enable = false; + use_random_sleep = false; + break; + case 'e': + check_alldigits(optarg, "-e [--threads]"); + value = atoi(optarg); + if (value > 0) + nr_threads = value; + else { + warnx("NRTHR must be > 0\n"); + show_usage(); + exit(0); + } + break; + case 'u': + check_alldigits(optarg, "-u [--time]"); + value = atoi(optarg); + if (value < 0) { + warnx("TIME must be >= 0\n"); + show_usage(); + exit(0); + } + trace_enable = true; + use_random_sleep = true; + sleep_time = value * USEC_PER_MSEC; + break; + case 'v': + verbosity++; + break; + case 'r': + trace_enable = true; + use_random_sleep = true; + break; + case 'a': + check_alldigits(optarg, "-a [--nrlat]"); + value = atoi(optarg); + if (value <= 0) { + warnx("NRLAT must be > 0\n"); + show_usage(); + exit(0); + } + trace_enable = true; + use_random_sleep = true; + table_startsize = value; + break; + case 'x': + setup_ftrace = false; + break; + case 'i': + setup_ftrace = false; + debug_tracefile = strdup(optarg); + break; + case 'm': + setup_ftrace = false; + debug_maxlat = strdup(optarg); + break; + default: + show_usage(); + exit(0); + break; + } + } + + if (setup_ftrace) { + if (!current_tracer) { + current_tracer = find_default_tracer(); + if (!current_tracer) + errx(EXIT_FAILURE, +"No default tracer found and tracer not specified\n"); + } + + if (use_random_sleep && !random_makes_sense(current_tracer)) { + warnx("WARNING: The tracer is %s and random sleep has", + current_tracer); + fprintf(stderr, +"been enabled. Random sleep is intended for the following tracers:\n"); + for (i = 0; random_tracers[i]; i++) + fprintf(stderr, "%s\n", random_tracers[i]); + fprintf(stderr, "\n"); + } + } + + if (debug_tracefile == DEBUG_NOFILE || + debug_maxlat == DEBUG_NOFILE) + errx(EXIT_FAILURE, +"Could not find tracing directory e.g. /sys/kernel/tracing\n"); + + if (!sched_policy_set) { + sched_policy = SCHED_RR; + sched_policy_set = true; + if (!sched_pri_set) { + sched_pri = RT_DEFAULT_PRI; + sched_pri_set = true; + } + } + + max = sched_get_priority_max(sched_policy); + min = sched_get_priority_min(sched_policy); + + if (sched_pri < min) { + printf( +"ATTENTION: Increasing priority to minimum, which is %d\n", min); + sched_pri = min; + } + if (sched_pri > max) { + printf( +"ATTENTION: Reducing priority to maximum, which is %d\n", max); + sched_pri = max; + } +} + +static void show_params(void) +{ + printf( +"\n" +"Running with scheduling policy %s and priority %d. Using %d print threads.\n", + policy_name(sched_policy), sched_pri, nr_threads); + if (trace_enable) { + if (use_random_sleep) { + printf( +"%s will be printed with random delay\n" +"Start size of the probability table:\t\t\t%d\n" +"Print a message when the prob. table changes size:\t%s\n" +"Print a warning when an event has been lost:\t\t%s\n" +"Sleep time is:\t\t\t\t\t\t%ld ms\n", +debug_tracefile, +table_startsize, +bool2str(verbose_sizechange()), +bool2str(verbose_lostevent()), +sleep_time / USEC_PER_MSEC); + } else { + printf("%s will be printed immediately\n", + debug_tracefile); + } + } else { + printf("%s will not be printed\n", + debug_tracefile); + } + if (setup_ftrace) { + printf("Tracer:\t\t\t\t\t\t\t%s\n" + "%s option:\t\t\t\t\t%s\n" + "%s option:\t\t\t\t\t%s\n", + current_tracer, + optstr[OPTIDX_FUNC_TR], + bool2str(use_options[OPTIDX_FUNC_TR]), + optstr[OPTIDX_DISP_GR], + bool2str(use_options[OPTIDX_DISP_GR])); + if (!strcmp(threshold, "0")) + printf("Threshold:\t\t\t\t\t\ttracing_max_latency\n"); + else + printf("Threshold:\t\t\t\t\t\t%s\n", threshold); + } + printf("\n"); +} + +int main(int argc, char *argv[]) +{ + init_save_state(); + signal_blocking(SIG_BLOCK); + setup_sig_handler(); + open_stdout(); + + if (argc >= 1) + prg_name = argv[0]; + else + prg_name = prg_unknown; + + scan_arguments(argc, argv); + show_params(); + + init_printstate(); + init_print_mtx(); + if (use_random_sleep) { + init_probabilities(); + if (verbose_sizechange()) + printf("Initializing probability table to %d\n", + table_startsize); + sleeptable_resize(table_startsize, false, NULL); + } + set_priority(); + init_queue(&printqueue); + start_printthread(); + tracing_loop(); + return 0; +} diff --git a/tools/tracing/rtla/.gitignore b/tools/tracing/rtla/.gitignore new file mode 100644 index 000000000000..1a394ad26cc1 --- /dev/null +++ b/tools/tracing/rtla/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +rtla +rtla-static +fixdep +feature +FEATURE-DUMP +*.skel.h diff --git a/tools/tracing/rtla/Build b/tools/tracing/rtla/Build new file mode 100644 index 000000000000..6c9d5b36a315 --- /dev/null +++ b/tools/tracing/rtla/Build @@ -0,0 +1 @@ +rtla-y += src/ diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile new file mode 100644 index 000000000000..746ccf2f5808 --- /dev/null +++ b/tools/tracing/rtla/Makefile @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: GPL-2.0-only + +ifeq ($(srctree),) + srctree := $(patsubst %/,%,$(dir $(CURDIR))) + srctree := $(patsubst %/,%,$(dir $(srctree))) + srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +include $(srctree)/tools/scripts/Makefile.include + +# O is an alias for OUTPUT +OUTPUT := $(O) + +ifeq ($(OUTPUT),) + OUTPUT := $(CURDIR) +else + # subdir is used by the ../Makefile in $(call descend,) + ifneq ($(subdir),) + OUTPUT := $(OUTPUT)/$(subdir) + endif +endif + +ifneq ($(patsubst %/,,$(lastword $(OUTPUT))),) + OUTPUT := $(OUTPUT)/ +endif + +RTLA := $(OUTPUT)rtla +RTLA_IN := $(RTLA)-in.o + +VERSION := $(shell sh -c "make -sC ../../.. kernelversion | grep -v make") +DOCSRC := ../../../Documentation/tools/rtla/ + +FEATURE_TESTS := libtraceevent +FEATURE_TESTS += libtracefs +FEATURE_TESTS += libcpupower +FEATURE_TESTS += libbpf +FEATURE_TESTS += clang-bpf-co-re +FEATURE_TESTS += bpftool-skeletons +FEATURE_DISPLAY := libtraceevent +FEATURE_DISPLAY += libtracefs +FEATURE_DISPLAY += libcpupower +FEATURE_DISPLAY += libbpf +FEATURE_DISPLAY += clang-bpf-co-re +FEATURE_DISPLAY += bpftool-skeletons + +all: $(RTLA) + +include $(srctree)/tools/build/Makefile.include +include Makefile.rtla + +# check for dependencies only on required targets +NON_CONFIG_TARGETS := clean install tarball doc doc_clean doc_install + +config := 1 +ifdef MAKECMDGOALS +ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) + config := 0 +endif +endif + +ifeq ($(config),1) + include $(srctree)/tools/build/Makefile.feature + include Makefile.config +endif + +CFLAGS += $(INCLUDES) $(LIB_INCLUDES) + +export CFLAGS OUTPUT srctree + +ifeq ($(BUILD_BPF_SKEL),1) +src/timerlat.bpf.o: src/timerlat.bpf.c + $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -c $(filter %.c,$^) -o $@ + +src/timerlat.skel.h: src/timerlat.bpf.o + $(QUIET_GENSKEL)$(SYSTEM_BPFTOOL) gen skeleton $< > $@ +else +src/timerlat.skel.h: + $(Q)echo '/* BPF skeleton is disabled */' > src/timerlat.skel.h +endif + +$(RTLA): $(RTLA_IN) + $(QUIET_LINK)$(CC) $(LDFLAGS) -o $(RTLA) $(RTLA_IN) $(EXTLIBS) + +static: $(RTLA_IN) + $(eval LDFLAGS += -static) + $(QUIET_LINK)$(CC) -static $(LDFLAGS) -o $(RTLA)-static $(RTLA_IN) $(EXTLIBS) + +rtla.%: fixdep FORCE + make -f $(srctree)/tools/build/Makefile.build dir=. $@ + +$(RTLA_IN): fixdep FORCE src/timerlat.skel.h + make $(build)=rtla + +clean: doc_clean fixdep-clean + $(call QUIET_CLEAN, rtla) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)rm -f rtla rtla-static fixdep FEATURE-DUMP rtla-* + $(Q)rm -rf feature + $(Q)rm -f src/timerlat.bpf.o src/timerlat.skel.h +check: $(RTLA) + RTLA=$(RTLA) prove -o -f tests/ +.PHONY: FORCE clean check diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config new file mode 100644 index 000000000000..07ff5e8f3006 --- /dev/null +++ b/tools/tracing/rtla/Makefile.config @@ -0,0 +1,108 @@ +# SPDX-License-Identifier: GPL-2.0-only + +include $(srctree)/tools/scripts/utilities.mak + +STOP_ERROR := + +LIBTRACEEVENT_MIN_VERSION = 1.5 +LIBTRACEFS_MIN_VERSION = 1.6 + +ifndef ($(NO_LIBTRACEEVENT),1) + ifeq ($(call get-executable,$(PKG_CONFIG)),) + $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it) + endif +endif + +define lib_setup + $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) + $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) + $(eval EXTLIBS += $(shell sh -c "$(PKG_CONFIG) --libs-only-l lib$(1)")) +endef + +$(call feature_check,libtraceevent) +ifeq ($(feature-libtraceevent), 1) + $(call detected,CONFIG_LIBTRACEEVENT) + + TEST = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) libtraceevent > /dev/null 2>&1 && echo y || echo n") + ifeq ($(TEST),n) + $(info libtraceevent version is too low, it must be at least $(LIBTRACEEVENT_MIN_VERSION)) + STOP_ERROR := 1 + endif + + $(call lib_setup,traceevent) +else + STOP_ERROR := 1 + $(info libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel) +endif + +$(call feature_check,libtracefs) +ifeq ($(feature-libtracefs), 1) + $(call detected,CONFIG_LIBTRACEFS) + + TEST = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) libtracefs > /dev/null 2>&1 && echo y || echo n") + ifeq ($(TEST),n) + $(info libtracefs version is too low, it must be at least $(LIBTRACEFS_MIN_VERSION)) + STOP_ERROR := 1 + endif + + $(call lib_setup,tracefs) +else + STOP_ERROR := 1 + $(info libtracefs is missing. Please install libtracefs-dev/libtracefs-devel) +endif + +$(call feature_check,libcpupower) +ifeq ($(feature-libcpupower), 1) + $(call detected,CONFIG_LIBCPUPOWER) + CFLAGS += -DHAVE_LIBCPUPOWER_SUPPORT + EXTLIBS += -lcpupower +else + $(info libcpupower is missing, building without --deepest-idle-state support.) + $(info Please install libcpupower-dev/kernel-tools-libs-devel) +endif + +ifndef BUILD_BPF_SKEL + # BPF skeletons are used to implement improved sample collection, enable + # them by default. + BUILD_BPF_SKEL := 1 +endif + +ifeq ($(BUILD_BPF_SKEL),0) + $(info BPF skeleton support disabled, building without BPF skeleton support.) +endif + +$(call feature_check,libbpf) +ifeq ($(feature-libbpf), 1) + $(call detected,CONFIG_LIBBPF) +else + $(info libbpf is missing, building without BPF skeleton support.) + $(info Please install libbpf-dev/libbpf-devel) + BUILD_BPF_SKEL := 0 +endif + +$(call feature_check,clang-bpf-co-re) +ifeq ($(feature-clang-bpf-co-re), 1) + $(call detected,CONFIG_CLANG_BPF_CO_RE) +else + $(info clang is missing or does not support BPF CO-RE, building without BPF skeleton support.) + $(info Please install clang) + BUILD_BPF_SKEL := 0 +endif + +$(call feature_check,bpftool-skeletons) +ifeq ($(feature-bpftool-skeletons), 1) + $(call detected,CONFIG_BPFTOOL_SKELETONS) +else + $(info bpftool is missing or not supporting skeletons, building without BPF skeleton support.) + $(info Please install bpftool) + BUILD_BPF_SKEL := 0 +endif + +ifeq ($(BUILD_BPF_SKEL),1) + CFLAGS += -DHAVE_BPF_SKEL + EXTLIBS += -lbpf +endif + +ifeq ($(STOP_ERROR),1) + $(error Please, check the errors above.) +endif diff --git a/tools/tracing/rtla/Makefile.rtla b/tools/tracing/rtla/Makefile.rtla new file mode 100644 index 000000000000..1743d91829d4 --- /dev/null +++ b/tools/tracing/rtla/Makefile.rtla @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: GPL-2.0-only + +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) +$(call allow-override,PKG_CONFIG,pkg-config) +$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) +$(call allow-override,LDCONFIG,ldconfig) +export CC AR STRIP PKG_CONFIG LD_SO_CONF_PATH LDCONFIG + +FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ + -fasynchronous-unwind-tables -fstack-clash-protection +WOPTS := -O -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 \ + -Wp,-D_GLIBCXX_ASSERTIONS + +ifeq ($(CC),clang) + FOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(FOPTS)) + WOPTS := $(filter-out -Wno-maybe-uninitialized, $(WOPTS)) +endif + +CFLAGS := -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(WOPTS) $(CFLAGS) +LDFLAGS := -ggdb $(LDFLAGS) + +RM := rm -rf +LN := ln -s +INSTALL := install +MKDIR := mkdir +STRIP := strip +BINDIR := /usr/bin +CTAGS := ctags +ETAGS := ctags -e + +.PHONY: install +install: doc_install + @$(MKDIR) -p $(DESTDIR)$(BINDIR) + $(call QUIET_INSTALL,rtla)$(INSTALL) $(RTLA) -m 755 $(DESTDIR)$(BINDIR) + @$(STRIP) $(DESTDIR)$(BINDIR)/rtla + @test ! -f $(DESTDIR)$(BINDIR)/osnoise || $(RM) $(DESTDIR)$(BINDIR)/osnoise + @$(LN) rtla $(DESTDIR)$(BINDIR)/osnoise + @test ! -f $(DESTDIR)$(BINDIR)/hwnoise || $(RM) $(DESTDIR)$(BINDIR)/hwnoise + @$(LN) -s rtla $(DESTDIR)$(BINDIR)/hwnoise + @test ! -f $(DESTDIR)$(BINDIR)/timerlat || $(RM) $(DESTDIR)$(BINDIR)/timerlat + @$(LN) -s rtla $(DESTDIR)$(BINDIR)/timerlat + +.PHONY: tags +tags: + $(CTAGS) -R --extras=+f --c-kinds=+p src + +.PHONY: TAGS +TAGS: + $(ETAGS) -R --extras=+f --c-kinds=+p src + +.PHONY: tags_clean +tags_clean: + $(RM) tags TAGS + +.PHONY: doc doc_clean doc_install +doc: + $(MAKE) -C $(DOCSRC) + +doc_clean: + $(MAKE) -C $(DOCSRC) clean + +doc_install: + $(MAKE) -C $(DOCSRC) install + +# This section is necessary to make the rtla tarball +NAME := rtla +DIRS := src +FILES := Makefile README.txt +CEXT := bz2 +TARBALL := $(NAME)-$(VERSION).tar.$(CEXT) +TAROPTS := -cvjf $(TARBALL) +SRCTREE := $(or $(BUILD_SRC),$(CURDIR)) + +tarball: clean + $(RM) $(NAME)-$(VERSION) && $(MKDIR) $(NAME)-$(VERSION) + echo $(VERSION) > $(NAME)-$(VERSION)/VERSION + cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION) + $(MKDIR) $(NAME)-$(VERSION)/Documentation/ + cp -rp $(SRCTREE)/../../../Documentation/tools/$(NAME)/* $(NAME)-$(VERSION)/Documentation/ + cp Makefile.standalone $(NAME)-$(VERSION)/Makefile + cp Makefile.$(NAME) $(NAME)-$(VERSION)/ + tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION) + $(RM) $(NAME)-$(VERSION) +.PHONY: tarball diff --git a/tools/tracing/rtla/Makefile.standalone b/tools/tracing/rtla/Makefile.standalone new file mode 100644 index 000000000000..86d07cb52fa5 --- /dev/null +++ b/tools/tracing/rtla/Makefile.standalone @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0-only + +VERSION := $(shell cat VERSION) +CFLAGS += $$($(PKG_CONFIG) --cflags libtracefs) +EXTLIBS += $$($(PKG_CONFIG) --libs libtracefs) + +rtla: + +include Makefile.rtla + +SRC := $(wildcard src/*.c) +HDR := $(wildcard src/*.h) +OBJ := $(SRC:.c=.o) +DOCSRC := Documentation/ + +rtla: $(OBJ) + $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) $(EXTLIBS) + $(info This is a deprecated method to compile RTLA, please compile from Linux kernel source) + +.PHONY: clean tarball +clean: doc_clean + @test ! -f rtla || rm rtla + @test ! -f rtla-static || rm rtla-static + @test ! -f src/rtla.o || rm src/rtla.o + @test ! -f $(TARBALL) || rm -f $(TARBALL) + @rm -rf *~ $(OBJ) *.tar.$(CEXT) diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt new file mode 100644 index 000000000000..43e98311d10f --- /dev/null +++ b/tools/tracing/rtla/README.txt @@ -0,0 +1,44 @@ +RTLA: Real-Time Linux Analysis tools + +The rtla meta-tool includes a set of commands that aims to analyze +the real-time properties of Linux. Instead of testing Linux as a black box, +rtla leverages kernel tracing capabilities to provide precise information +about the properties and root causes of unexpected results. + +Installing RTLA + +RTLA depends on the following libraries and tools: + + - libtracefs + - libtraceevent + - libcpupower (optional, for --deepest-idle-state) + +For BPF sample collection support, the following extra dependencies are +required: + + - libbpf 1.0.0 or later + - bpftool with skeleton support + - clang with BPF CO-RE support + +It also depends on python3-docutils to compile man pages. + +For development, we suggest the following steps for compiling rtla: + + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git + $ cd libtraceevent/ + $ make + $ sudo make install + $ cd .. + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git + $ cd libtracefs/ + $ make + $ sudo make install + $ cd .. + $ cd $libcpupower_src + $ make + $ sudo make install + $ cd $rtla_src + $ make + $ sudo make install + +For further information, please refer to the rtla man page. diff --git a/tools/tracing/rtla/sample/timerlat_load.py b/tools/tracing/rtla/sample/timerlat_load.py new file mode 100644 index 000000000000..a819c3588073 --- /dev/null +++ b/tools/tracing/rtla/sample/timerlat_load.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2024 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# This is a sample code about how to use timerlat's timer by any workload +# so rtla can measure and provide auto-analysis for the overall latency (IOW +# the response time) for a task. +# +# Before running it, you need to dispatch timerlat with -U option in a terminal. +# Then # run this script pinned to a CPU on another terminal. For example: +# +# timerlat_load.py 1 -p 95 +# +# The "Timerlat IRQ" is the IRQ latency, The thread latency is the latency +# for the python process to get the CPU. The Ret from user Timer Latency is +# the overall latency. In other words, it is the response time for that +# activation. +# +# This is just an example, the load is reading 20MB of data from /dev/full +# It is in python because it is easy to read :-) + +import argparse +import sys +import os + +parser = argparse.ArgumentParser(description='user-space timerlat thread in Python') +parser.add_argument("cpu", type=int, help='CPU to run timerlat thread') +parser.add_argument("-p", "--prio", type=int, help='FIFO priority') +args = parser.parse_args() + +try: + affinity_mask = {args.cpu} + os.sched_setaffinity(0, affinity_mask) +except Exception as e: + print(f"Error setting affinity: {e}") + sys.exit(1) + +if args.prio: + try: + param = os.sched_param(args.prio) + os.sched_setscheduler(0, os.SCHED_FIFO, param) + except Exception as e: + print(f"Error setting priority: {e}") + sys.exit(1) + +try: + timerlat_path = f"/sys/kernel/tracing/osnoise/per_cpu/cpu{args.cpu}/timerlat_fd" + timerlat_fd = open(timerlat_path, 'r') +except PermissionError: + print("Permission denied. Please check your access rights.") + sys.exit(1) +except OSError: + print("Error opening timerlat fd, did you run timerlat -U?") + sys.exit(1) + +try: + data_fd = open("/dev/full", 'r') +except Exception as e: + print(f"Error opening data fd: {e}") + sys.exit(1) + +while True: + try: + timerlat_fd.read(1) + data_fd.read(20 * 1024 * 1024) + except KeyboardInterrupt: + print("Leaving") + break + except IOError as e: + print(f"I/O error occurred: {e}") + break + except Exception as e: + print(f"Unexpected error: {e}") + break + +timerlat_fd.close() +data_fd.close() diff --git a/tools/tracing/rtla/src/Build b/tools/tracing/rtla/src/Build new file mode 100644 index 000000000000..329e24a40cf7 --- /dev/null +++ b/tools/tracing/rtla/src/Build @@ -0,0 +1,14 @@ +rtla-y += trace.o +rtla-y += utils.o +rtla-y += actions.o +rtla-y += common.o +rtla-y += osnoise.o +rtla-y += osnoise_top.o +rtla-y += osnoise_hist.o +rtla-y += timerlat.o +rtla-y += timerlat_top.o +rtla-y += timerlat_hist.o +rtla-y += timerlat_u.o +rtla-y += timerlat_aa.o +rtla-y += timerlat_bpf.o +rtla-y += rtla.o diff --git a/tools/tracing/rtla/src/actions.c b/tools/tracing/rtla/src/actions.c new file mode 100644 index 000000000000..8945aee58d51 --- /dev/null +++ b/tools/tracing/rtla/src/actions.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> + +#include "actions.h" +#include "trace.h" +#include "utils.h" + +/* + * actions_init - initialize struct actions + */ +void +actions_init(struct actions *self) +{ + self->size = action_default_size; + self->list = calloc(self->size, sizeof(struct action)); + self->len = 0; + self->continue_flag = false; + + memset(&self->present, 0, sizeof(self->present)); + + /* This has to be set by the user */ + self->trace_output_inst = NULL; +} + +/* + * actions_destroy - destroy struct actions + */ +void +actions_destroy(struct actions *self) +{ + /* Free any action-specific data */ + for (struct action *action = self->list; action < self->list + self->len; action++) { + if (action->type == ACTION_SHELL) + free(action->command); + if (action->type == ACTION_TRACE_OUTPUT) + free(action->trace_output); + } + + /* Free action list */ + free(self->list); +} + +/* + * actions_new - Get pointer to new action + */ +static struct action * +actions_new(struct actions *self) +{ + if (self->len >= self->size) { + self->size *= 2; + self->list = realloc(self->list, self->size * sizeof(struct action)); + } + + return &self->list[self->len++]; +} + +/* + * actions_add_trace_output - add an action to output trace + */ +int +actions_add_trace_output(struct actions *self, const char *trace_output) +{ + struct action *action = actions_new(self); + + self->present[ACTION_TRACE_OUTPUT] = true; + action->type = ACTION_TRACE_OUTPUT; + action->trace_output = calloc(strlen(trace_output) + 1, sizeof(char)); + if (!action->trace_output) + return -1; + strcpy(action->trace_output, trace_output); + + return 0; +} + +/* + * actions_add_trace_output - add an action to send signal to a process + */ +int +actions_add_signal(struct actions *self, int signal, int pid) +{ + struct action *action = actions_new(self); + + self->present[ACTION_SIGNAL] = true; + action->type = ACTION_SIGNAL; + action->signal = signal; + action->pid = pid; + + return 0; +} + +/* + * actions_add_shell - add an action to execute a shell command + */ +int +actions_add_shell(struct actions *self, const char *command) +{ + struct action *action = actions_new(self); + + self->present[ACTION_SHELL] = true; + action->type = ACTION_SHELL; + action->command = calloc(strlen(command) + 1, sizeof(char)); + if (!action->command) + return -1; + strcpy(action->command, command); + + return 0; +} + +/* + * actions_add_continue - add an action to resume measurement + */ +int +actions_add_continue(struct actions *self) +{ + struct action *action = actions_new(self); + + self->present[ACTION_CONTINUE] = true; + action->type = ACTION_CONTINUE; + + return 0; +} + +/* + * actions_parse - add an action based on text specification + */ +int +actions_parse(struct actions *self, const char *trigger, const char *tracefn) +{ + enum action_type type = ACTION_NONE; + const char *token; + char trigger_c[strlen(trigger) + 1]; + + /* For ACTION_SIGNAL */ + int signal = 0, pid = 0; + + /* For ACTION_TRACE_OUTPUT */ + const char *trace_output; + + strcpy(trigger_c, trigger); + token = strtok(trigger_c, ","); + + if (strcmp(token, "trace") == 0) + type = ACTION_TRACE_OUTPUT; + else if (strcmp(token, "signal") == 0) + type = ACTION_SIGNAL; + else if (strcmp(token, "shell") == 0) + type = ACTION_SHELL; + else if (strcmp(token, "continue") == 0) + type = ACTION_CONTINUE; + else + /* Invalid trigger type */ + return -1; + + token = strtok(NULL, ","); + + switch (type) { + case ACTION_TRACE_OUTPUT: + /* Takes no argument */ + if (token == NULL) + trace_output = tracefn; + else { + if (strlen(token) > 5 && strncmp(token, "file=", 5) == 0) { + trace_output = token + 5; + } else { + /* Invalid argument */ + return -1; + } + + token = strtok(NULL, ","); + if (token != NULL) + /* Only one argument allowed */ + return -1; + } + return actions_add_trace_output(self, trace_output); + case ACTION_SIGNAL: + /* Takes two arguments, num (signal) and pid */ + while (token != NULL) { + if (strlen(token) > 4 && strncmp(token, "num=", 4) == 0) { + signal = atoi(token + 4); + } else if (strlen(token) > 4 && strncmp(token, "pid=", 4) == 0) { + if (strncmp(token + 4, "parent", 7) == 0) + pid = -1; + else + pid = atoi(token + 4); + } else { + /* Invalid argument */ + return -1; + } + + token = strtok(NULL, ","); + } + + if (!signal || !pid) + /* Missing argument */ + return -1; + + return actions_add_signal(self, signal, pid); + case ACTION_SHELL: + if (token == NULL) + return -1; + if (strlen(token) > 8 && strncmp(token, "command=", 8) == 0) + return actions_add_shell(self, token + 8); + return -1; + case ACTION_CONTINUE: + /* Takes no argument */ + if (token != NULL) + return -1; + return actions_add_continue(self); + default: + return -1; + } +} + +/* + * actions_perform - perform all actions + */ +int +actions_perform(struct actions *self) +{ + int pid, retval; + const struct action *action; + + for (action = self->list; action < self->list + self->len; action++) { + switch (action->type) { + case ACTION_TRACE_OUTPUT: + retval = save_trace_to_file(self->trace_output_inst, action->trace_output); + if (retval) { + err_msg("Error saving trace\n"); + return retval; + } + break; + case ACTION_SIGNAL: + if (action->pid == -1) + pid = getppid(); + else + pid = action->pid; + retval = kill(pid, action->signal); + if (retval) { + err_msg("Error sending signal\n"); + return retval; + } + break; + case ACTION_SHELL: + retval = system(action->command); + if (retval) + return retval; + break; + case ACTION_CONTINUE: + self->continue_flag = true; + return 0; + default: + break; + } + } + + return 0; +} diff --git a/tools/tracing/rtla/src/actions.h b/tools/tracing/rtla/src/actions.h new file mode 100644 index 000000000000..a4f9b570775b --- /dev/null +++ b/tools/tracing/rtla/src/actions.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <tracefs.h> +#include <stdbool.h> + +enum action_type { + ACTION_NONE = 0, + ACTION_TRACE_OUTPUT, + ACTION_SIGNAL, + ACTION_SHELL, + ACTION_CONTINUE, + ACTION_FIELD_N +}; + +struct action { + enum action_type type; + union { + struct { + /* For ACTION_TRACE_OUTPUT */ + char *trace_output; + }; + struct { + /* For ACTION_SIGNAL */ + int signal; + int pid; + }; + struct { + /* For ACTION_SHELL */ + char *command; + }; + }; +}; + +static const int action_default_size = 8; + +struct actions { + struct action *list; + int len, size; + bool present[ACTION_FIELD_N]; + bool continue_flag; + + /* External dependencies */ + struct tracefs_instance *trace_output_inst; +}; + +void actions_init(struct actions *self); +void actions_destroy(struct actions *self); +int actions_add_trace_output(struct actions *self, const char *trace_output); +int actions_add_signal(struct actions *self, int signal, int pid); +int actions_add_shell(struct actions *self, const char *command); +int actions_add_continue(struct actions *self); +int actions_parse(struct actions *self, const char *trigger, const char *tracefn); +int actions_perform(struct actions *self); diff --git a/tools/tracing/rtla/src/common.c b/tools/tracing/rtla/src/common.c new file mode 100644 index 000000000000..b197037fc58b --- /dev/null +++ b/tools/tracing/rtla/src/common.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <pthread.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include "common.h" + +struct trace_instance *trace_inst; +int stop_tracing; + +static void stop_trace(int sig) +{ + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(trace_inst->inst); + return; + } + stop_tracing = 1; + if (trace_inst) + trace_instance_stop(trace_inst); +} + +/* + * set_signals - handles the signal to stop the tool + */ +static void set_signals(struct common_params *params) +{ + signal(SIGINT, stop_trace); + if (params->duration) { + signal(SIGALRM, stop_trace); + alarm(params->duration); + } +} + +/* + * common_apply_config - apply common configs to the initialized tool + */ +int +common_apply_config(struct osnoise_tool *tool, struct common_params *params) +{ + int retval, i; + + if (!params->sleep_time) + params->sleep_time = 1; + + retval = osnoise_set_cpus(tool->context, params->cpus ? params->cpus : "all"); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + + if (!params->cpus) { + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) + CPU_SET(i, ¶ms->monitored_cpus); + } + + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + /* + * Set workload according to type of thread if the kernel supports it. + * On kernels without support, user threads will have already failed + * on missing fd, and kernel threads do not need it. + */ + retval = osnoise_set_workload(tool->context, params->kernel_workload); + if (retval < -1) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; + } + + return 0; + +out_err: + return -1; +} + + +int run_tool(struct tool_ops *ops, int argc, char *argv[]) +{ + struct common_params *params; + enum result return_value = ERROR; + struct osnoise_tool *tool; + bool stopped; + int retval; + + params = ops->parse_args(argc, argv); + if (!params) + exit(1); + + tool = ops->init_tool(params); + if (!tool) { + err_msg("Could not init osnoise tool\n"); + goto out_exit; + } + tool->ops = ops; + tool->params = params; + + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + trace_inst = &tool->trace; + + retval = ops->apply_config(tool); + if (retval) { + err_msg("Could not apply config\n"); + goto out_free; + } + + retval = enable_tracer_by_name(trace_inst->inst, ops->tracer); + if (retval) { + err_msg("Failed to enable %s tracer\n", ops->tracer); + goto out_free; + } + + if (params->set_sched) { + retval = set_comm_sched_attr(ops->comm_prefix, ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_free; + } + } + + if (params->cgroup && !params->user_data) { + retval = set_comm_cgroup(ops->comm_prefix, params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + + + if (params->threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->end_actions.present[ACTION_TRACE_OUTPUT]) { + tool->record = osnoise_init_trace_tool(ops->tracer); + if (!tool->record) { + err_msg("Failed to enable the trace instance\n"); + goto out_free; + } + params->threshold_actions.trace_output_inst = tool->record->trace.inst; + params->end_actions.trace_output_inst = tool->record->trace.inst; + + if (params->events) { + retval = trace_events_enable(&tool->record->trace, params->events); + if (retval) + goto out_trace; + } + + if (params->buffer_size > 0) { + retval = trace_set_buffer_size(&tool->record->trace, params->buffer_size); + if (retval) + goto out_trace; + } + } + + if (params->user_workload) { + pthread_t user_thread; + + /* rtla asked to stop */ + params->user.should_run = 1; + /* all threads left */ + params->user.stopped_running = 0; + + params->user.set = ¶ms->monitored_cpus; + if (params->set_sched) + params->user.sched_param = ¶ms->sched_param; + else + params->user.sched_param = NULL; + + params->user.cgroup_name = params->cgroup_name; + + retval = pthread_create(&user_thread, NULL, timerlat_u_dispatcher, ¶ms->user); + if (retval) + err_msg("Error creating timerlat user-space threads\n"); + } + + retval = ops->enable(tool); + if (retval) + goto out_trace; + + tool->start_time = time(NULL); + set_signals(params); + + retval = ops->main(tool); + if (retval) + goto out_trace; + + if (params->user_workload && !params->user.stopped_running) { + params->user.should_run = 0; + sleep(1); + } + + ops->print_stats(tool); + + actions_perform(¶ms->end_actions); + + return_value = PASSED; + + stopped = osnoise_trace_is_off(tool, tool->record) && !stop_tracing; + if (stopped) { + printf("%s hit stop tracing\n", ops->tracer); + return_value = FAILED; + } + + if (ops->analyze) + ops->analyze(tool, stopped); + +out_trace: + trace_events_destroy(&tool->record->trace, params->events); + params->events = NULL; +out_free: + ops->free(tool); + osnoise_destroy_tool(tool->record); + osnoise_destroy_tool(tool); + actions_destroy(¶ms->threshold_actions); + actions_destroy(¶ms->end_actions); + free(params); +out_exit: + exit(return_value); +} + +int top_main_loop(struct osnoise_tool *tool) +{ + struct common_params *params = tool->params; + struct trace_instance *trace = &tool->trace; + struct osnoise_tool *record = tool->record; + int retval; + + while (!stop_tracing) { + sleep(params->sleep_time); + + if (params->aa_only && !osnoise_trace_is_off(tool, record)) + continue; + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + return retval; + } + + if (!params->quiet) + tool->ops->print_stats(tool); + + if (osnoise_trace_is_off(tool, record)) { + if (stop_tracing) + /* stop tracing requested, do not perform actions */ + return 0; + + actions_perform(¶ms->threshold_actions); + + if (!params->threshold_actions.continue_flag) + /* continue flag not set, break */ + return 0; + + /* continue action reached, re-enable tracing */ + if (record) + trace_instance_start(&record->trace); + if (tool->aa) + trace_instance_start(&tool->aa->trace); + trace_instance_start(trace); + } + + /* is there still any user-threads ? */ + if (params->user_workload) { + if (params->user.stopped_running) { + debug_msg("timerlat user space threads stopped!\n"); + break; + } + } + } + + return 0; +} + +int hist_main_loop(struct osnoise_tool *tool) +{ + struct common_params *params = tool->params; + struct trace_instance *trace = &tool->trace; + int retval = 0; + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + break; + } + + if (osnoise_trace_is_off(tool, tool->record)) { + if (stop_tracing) + /* stop tracing requested, do not perform actions */ + break; + + actions_perform(¶ms->threshold_actions); + + if (!params->threshold_actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (tool->record) + trace_instance_start(&tool->record->trace); + if (tool->aa) + trace_instance_start(&tool->aa->trace); + trace_instance_start(&tool->trace); + } + + /* is there still any user-threads ? */ + if (params->user_workload) { + if (params->user.stopped_running) { + debug_msg("user-space threads stopped!\n"); + break; + } + } + } + + return retval; +} diff --git a/tools/tracing/rtla/src/common.h b/tools/tracing/rtla/src/common.h new file mode 100644 index 000000000000..9ec2b7632c37 --- /dev/null +++ b/tools/tracing/rtla/src/common.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#pragma once + +#include "actions.h" +#include "timerlat_u.h" +#include "trace.h" +#include "utils.h" + +/* + * osnoise_context - read, store, write, restore osnoise configs. + */ +struct osnoise_context { + int flags; + int ref; + + char *curr_cpus; + char *orig_cpus; + + /* 0 as init value */ + unsigned long long orig_runtime_us; + unsigned long long runtime_us; + + /* 0 as init value */ + unsigned long long orig_period_us; + unsigned long long period_us; + + /* 0 as init value */ + long long orig_timerlat_period_us; + long long timerlat_period_us; + + /* 0 as init value */ + long long orig_tracing_thresh; + long long tracing_thresh; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_us; + long long stop_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_total_us; + long long stop_total_us; + + /* -1 as init value because 0 is disabled */ + long long orig_print_stack; + long long print_stack; + + /* -1 as init value because 0 is off */ + int orig_opt_irq_disable; + int opt_irq_disable; + + /* -1 as init value because 0 is off */ + int orig_opt_workload; + int opt_workload; +}; + +extern struct trace_instance *trace_inst; +extern int stop_tracing; + +struct hist_params { + char no_irq; + char no_thread; + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +/* + * common_params - Parameters shared between timerlat_params and osnoise_params + */ +struct common_params { + /* trace configuration */ + char *cpus; + cpu_set_t monitored_cpus; + struct trace_events *events; + int buffer_size; + + /* Timing parameters */ + int warmup; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + + /* Scheduling parameters */ + int set_sched; + struct sched_attr sched_param; + int cgroup; + char *cgroup_name; + int hk_cpus; + cpu_set_t hk_cpu_set; + + /* Other parameters */ + struct hist_params hist; + int output_divisor; + int pretty_output; + int quiet; + int user_workload; + int kernel_workload; + int user_data; + int aa_only; + + struct actions threshold_actions; + struct actions end_actions; + struct timerlat_u_params user; +}; + +#define for_each_monitored_cpu(cpu, nr_cpus, common) \ + for (cpu = 0; cpu < nr_cpus; cpu++) \ + if (!(common)->cpus || CPU_ISSET(cpu, &(common)->monitored_cpus)) + +struct tool_ops; + +/* + * osnoise_tool - osnoise based tool definition. + * + * Only the "trace" and "context" fields are used for + * the additional trace instances (record and aa). + */ +struct osnoise_tool { + struct tool_ops *ops; + struct trace_instance trace; + struct osnoise_context *context; + void *data; + struct common_params *params; + time_t start_time; + struct osnoise_tool *record; + struct osnoise_tool *aa; +}; + +struct tool_ops { + const char *tracer; + const char *comm_prefix; + struct common_params *(*parse_args)(int argc, char *argv[]); + struct osnoise_tool *(*init_tool)(struct common_params *params); + int (*apply_config)(struct osnoise_tool *tool); + int (*enable)(struct osnoise_tool *tool); + int (*main)(struct osnoise_tool *tool); + void (*print_stats)(struct osnoise_tool *tool); + void (*analyze)(struct osnoise_tool *tool, bool stopped); + void (*free)(struct osnoise_tool *tool); +}; + +int osnoise_set_cpus(struct osnoise_context *context, char *cpus); +void osnoise_restore_cpus(struct osnoise_context *context); + +int osnoise_set_workload(struct osnoise_context *context, bool onoff); + +void osnoise_destroy_tool(struct osnoise_tool *top); +struct osnoise_tool *osnoise_init_tool(char *tool_name); +struct osnoise_tool *osnoise_init_trace_tool(const char *tracer); +bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record); + +int common_apply_config(struct osnoise_tool *tool, struct common_params *params); +int top_main_loop(struct osnoise_tool *tool); +int hist_main_loop(struct osnoise_tool *tool); diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c new file mode 100644 index 000000000000..312c511fa004 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.c @@ -0,0 +1,1253 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <sched.h> + +#include "osnoise.h" + +#define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ +#define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ + +/* + * osnoise_get_cpus - return the original "osnoise/cpus" content + * + * It also saves the value to be restored. + */ +char *osnoise_get_cpus(struct osnoise_context *context) +{ + if (context->curr_cpus) + return context->curr_cpus; + + if (context->orig_cpus) + return context->orig_cpus; + + context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL); + + /* + * The error value (NULL) is the same for tracefs_instance_file_read() + * and this functions, so: + */ + return context->orig_cpus; +} + +/* + * osnoise_set_cpus - configure osnoise to run on *cpus + * + * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat + * will run. This function opens this file, saves the current value, + * and set the cpus passed as argument. + */ +int osnoise_set_cpus(struct osnoise_context *context, char *cpus) +{ + char *orig_cpus = osnoise_get_cpus(context); + char buffer[1024]; + int retval; + + if (!orig_cpus) + return -1; + + context->curr_cpus = strdup(cpus); + if (!context->curr_cpus) + return -1; + + snprintf(buffer, 1024, "%s\n", cpus); + + debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer); + if (retval < 0) { + free(context->curr_cpus); + context->curr_cpus = NULL; + return -1; + } + + return 0; +} + +/* + * osnoise_restore_cpus - restore the original "osnoise/cpus" + * + * osnoise_set_cpus() saves the original data for the "osnoise/cpus" + * file. This function restore the original config it was previously + * modified. + */ +void osnoise_restore_cpus(struct osnoise_context *context) +{ + int retval; + + if (!context->orig_cpus) + return; + + if (!context->curr_cpus) + return; + + /* nothing to do? */ + if (!strcmp(context->orig_cpus, context->curr_cpus)) + goto out_done; + + debug_msg("restoring cpus to %s", context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus); + if (retval < 0) + err_msg("could not restore original osnoise cpus\n"); + +out_done: + free(context->curr_cpus); + context->curr_cpus = NULL; +} + +/* + * osnoise_put_cpus - restore cpus config and cleanup data + */ +void osnoise_put_cpus(struct osnoise_context *context) +{ + osnoise_restore_cpus(context); + + if (!context->orig_cpus) + return; + + free(context->orig_cpus); + context->orig_cpus = NULL; +} + +/* + * osnoise_read_ll_config - read a long long value from a config + * + * returns -1 on error. + */ +static long long osnoise_read_ll_config(char *rel_path) +{ + long long retval; + char *buffer; + + buffer = tracefs_instance_file_read(NULL, rel_path, NULL); + if (!buffer) + return -1; + + /* get_llong_from_str returns -1 on error */ + retval = get_llong_from_str(buffer); + + debug_msg("reading %s returned %lld\n", rel_path, retval); + + free(buffer); + + return retval; +} + +/* + * osnoise_write_ll_config - write a long long value to a config in rel_path + * + * returns -1 on error. + */ +static long long osnoise_write_ll_config(char *rel_path, long long value) +{ + char buffer[BUFF_U64_STR_SIZE]; + long long retval; + + snprintf(buffer, sizeof(buffer), "%lld\n", value); + + debug_msg("setting %s to %lld\n", rel_path, value); + + retval = tracefs_instance_file_write(NULL, rel_path, buffer); + return retval; +} + +/* + * osnoise_get_runtime - return the original "osnoise/runtime_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_runtime(struct osnoise_context *context) +{ + long long runtime_us; + + if (context->runtime_us != OSNOISE_TIME_INIT_VAL) + return context->runtime_us; + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + return context->orig_runtime_us; + + runtime_us = osnoise_read_ll_config("osnoise/runtime_us"); + if (runtime_us < 0) + goto out_err; + + context->orig_runtime_us = runtime_us; + return runtime_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_period - return the original "osnoise/period_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_period(struct osnoise_context *context) +{ + long long period_us; + + if (context->period_us != OSNOISE_TIME_INIT_VAL) + return context->period_us; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_period_us; + + period_us = osnoise_read_ll_config("osnoise/period_us"); + if (period_us < 0) + goto out_err; + + context->orig_period_us = period_us; + return period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +static int __osnoise_write_runtime(struct osnoise_context *context, + unsigned long long runtime) +{ + int retval; + + if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/runtime_us", runtime); + if (retval < 0) + return -1; + + context->runtime_us = runtime; + return 0; +} + +static int __osnoise_write_period(struct osnoise_context *context, + unsigned long long period) +{ + int retval; + + if (context->orig_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/period_us", period); + if (retval < 0) + return -1; + + context->period_us = period; + return 0; +} + +/* + * osnoise_set_runtime_period - set osnoise runtime and period + * + * Osnoise's runtime and period are related as runtime <= period. + * Thus, this function saves the original values, and then tries + * to set the runtime and period if they are != 0. + */ +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period) +{ + unsigned long long curr_runtime_us; + unsigned long long curr_period_us; + int retval; + + if (!period && !runtime) + return 0; + + curr_runtime_us = osnoise_get_runtime(context); + curr_period_us = osnoise_get_period(context); + + /* error getting any value? */ + if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + if (!period) { + if (runtime > curr_period_us) + return -1; + return __osnoise_write_runtime(context, runtime); + } else if (!runtime) { + if (period < curr_runtime_us) + return -1; + return __osnoise_write_period(context, period); + } + + if (runtime > curr_period_us) { + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + } else { + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + } + + return 0; +} + +/* + * osnoise_restore_runtime_period - restore the original runtime and period + */ +void osnoise_restore_runtime_period(struct osnoise_context *context) +{ + unsigned long long orig_runtime = context->orig_runtime_us; + unsigned long long orig_period = context->orig_period_us; + unsigned long long curr_runtime = context->runtime_us; + unsigned long long curr_period = context->period_us; + int retval; + + if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL)) + return; + + if ((orig_period == curr_period) && (orig_runtime == curr_runtime)) + goto out_done; + + retval = osnoise_set_runtime_period(context, orig_runtime, orig_period); + if (retval) + err_msg("Could not restore original osnoise runtime/period\n"); + +out_done: + context->runtime_us = OSNOISE_TIME_INIT_VAL; + context->period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_runtime_period - restore original values and cleanup data + */ +void osnoise_put_runtime_period(struct osnoise_context *context) +{ + osnoise_restore_runtime_period(context); + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + context->orig_runtime_us = OSNOISE_TIME_INIT_VAL; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + context->orig_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us" + */ +static long long +osnoise_get_timerlat_period_us(struct osnoise_context *context) +{ + long long timerlat_period_us; + + if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->timerlat_period_us; + + if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_timerlat_period_us; + + timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us"); + if (timerlat_period_us < 0) + goto out_err; + + context->orig_timerlat_period_us = timerlat_period_us; + return timerlat_period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_set_timerlat_period_us - set "timerlat_period_us" + */ +int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us) +{ + long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context); + int retval; + + if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us); + if (retval < 0) + return -1; + + context->timerlat_period_us = timerlat_period_us; + + return 0; +} + +/* + * osnoise_restore_timerlat_period_us - restore "timerlat_period_us" + */ +void osnoise_restore_timerlat_period_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + if (context->orig_timerlat_period_us == context->timerlat_period_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us); + if (retval < 0) + err_msg("Could not restore original osnoise timerlat_period_us\n"); + +out_done: + context->timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_timerlat_period_us - restore original values and cleanup data + */ +void osnoise_put_timerlat_period_us(struct osnoise_context *context) +{ + osnoise_restore_timerlat_period_us(context); + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_stop_us - read and save the original "stop_tracing_us" + */ +static long long +osnoise_get_stop_us(struct osnoise_context *context) +{ + long long stop_us; + + if (context->stop_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_us; + + if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_us; + + stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us"); + if (stop_us < 0) + goto out_err; + + context->orig_stop_us = stop_us; + return stop_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_us - set "stop_tracing_us" + */ +int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us) +{ + long long curr_stop_us = osnoise_get_stop_us(context); + int retval; + + if (curr_stop_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us); + if (retval < 0) + return -1; + + context->stop_us = stop_us; + + return 0; +} + +/* + * osnoise_restore_stop_us - restore the original "stop_tracing_us" + */ +void osnoise_restore_stop_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_us == context->stop_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_us\n"); + +out_done: + context->stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_us - restore original values and cleanup data + */ +void osnoise_put_stop_us(struct osnoise_context *context) +{ + osnoise_restore_stop_us(context); + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us" + */ +static long long +osnoise_get_stop_total_us(struct osnoise_context *context) +{ + long long stop_total_us; + + if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_total_us; + + if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_total_us; + + stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us"); + if (stop_total_us < 0) + goto out_err; + + context->orig_stop_total_us = stop_total_us; + return stop_total_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_total_us - set "stop_tracing_total_us" + */ +int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us) +{ + long long curr_stop_total_us = osnoise_get_stop_total_us(context); + int retval; + + if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us); + if (retval < 0) + return -1; + + context->stop_total_us = stop_total_us; + + return 0; +} + +/* + * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us" + */ +void osnoise_restore_stop_total_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_total_us == context->stop_total_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", + context->orig_stop_total_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_total_us\n"); + +out_done: + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_total_us - restore original values and cleanup data + */ +void osnoise_put_stop_total_us(struct osnoise_context *context) +{ + osnoise_restore_stop_total_us(context); + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_print_stack - read and save the original "print_stack" + */ +static long long +osnoise_get_print_stack(struct osnoise_context *context) +{ + long long print_stack; + + if (context->print_stack != OSNOISE_OPTION_INIT_VAL) + return context->print_stack; + + if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL) + return context->orig_print_stack; + + print_stack = osnoise_read_ll_config("osnoise/print_stack"); + if (print_stack < 0) + goto out_err; + + context->orig_print_stack = print_stack; + return print_stack; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_print_stack - set "print_stack" + */ +int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack) +{ + long long curr_print_stack = osnoise_get_print_stack(context); + int retval; + + if (curr_print_stack == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/print_stack", print_stack); + if (retval < 0) + return -1; + + context->print_stack = print_stack; + + return 0; +} + +/* + * osnoise_restore_print_stack - restore the original "print_stack" + */ +void osnoise_restore_print_stack(struct osnoise_context *context) +{ + int retval; + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_print_stack == context->print_stack) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack); + if (retval < 0) + err_msg("Could not restore original osnoise print_stack\n"); + +out_done: + context->print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_print_stack - restore original values and cleanup data + */ +void osnoise_put_print_stack(struct osnoise_context *context) +{ + osnoise_restore_print_stack(context); + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_tracing_thresh - read and save the original "tracing_thresh" + */ +static long long +osnoise_get_tracing_thresh(struct osnoise_context *context) +{ + long long tracing_thresh; + + if (context->tracing_thresh != OSNOISE_OPTION_INIT_VAL) + return context->tracing_thresh; + + if (context->orig_tracing_thresh != OSNOISE_OPTION_INIT_VAL) + return context->orig_tracing_thresh; + + tracing_thresh = osnoise_read_ll_config("tracing_thresh"); + if (tracing_thresh < 0) + goto out_err; + + context->orig_tracing_thresh = tracing_thresh; + return tracing_thresh; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_tracing_thresh - set "tracing_thresh" + */ +int osnoise_set_tracing_thresh(struct osnoise_context *context, long long tracing_thresh) +{ + long long curr_tracing_thresh = osnoise_get_tracing_thresh(context); + int retval; + + if (curr_tracing_thresh == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("tracing_thresh", tracing_thresh); + if (retval < 0) + return -1; + + context->tracing_thresh = tracing_thresh; + + return 0; +} + +/* + * osnoise_restore_tracing_thresh - restore the original "tracing_thresh" + */ +void osnoise_restore_tracing_thresh(struct osnoise_context *context) +{ + int retval; + + if (context->orig_tracing_thresh == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_tracing_thresh == context->tracing_thresh) + goto out_done; + + retval = osnoise_write_ll_config("tracing_thresh", context->orig_tracing_thresh); + if (retval < 0) + err_msg("Could not restore original tracing_thresh\n"); + +out_done: + context->tracing_thresh = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_tracing_thresh - restore original values and cleanup data + */ +void osnoise_put_tracing_thresh(struct osnoise_context *context) +{ + osnoise_restore_tracing_thresh(context); + + if (context->orig_tracing_thresh == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL; +} + +static int osnoise_options_get_option(char *option) +{ + char *options = tracefs_instance_file_read(NULL, "osnoise/options", NULL); + char no_option[128]; + int retval = 0; + char *opt; + + if (!options) + return OSNOISE_OPTION_INIT_VAL; + + /* + * Check first if the option is disabled. + */ + snprintf(no_option, sizeof(no_option), "NO_%s", option); + + opt = strstr(options, no_option); + if (opt) + goto out_free; + + /* + * Now that it is not disabled, if the string is there, it is + * enabled. If the string is not there, the option does not exist. + */ + opt = strstr(options, option); + if (opt) + retval = 1; + else + retval = OSNOISE_OPTION_INIT_VAL; + +out_free: + free(options); + return retval; +} + +static int osnoise_options_set_option(char *option, bool onoff) +{ + char no_option[128]; + + if (onoff) + return tracefs_instance_file_write(NULL, "osnoise/options", option); + + snprintf(no_option, sizeof(no_option), "NO_%s", option); + + return tracefs_instance_file_write(NULL, "osnoise/options", no_option); +} + +static int osnoise_get_irq_disable(struct osnoise_context *context) +{ + if (context->opt_irq_disable != OSNOISE_OPTION_INIT_VAL) + return context->opt_irq_disable; + + if (context->orig_opt_irq_disable != OSNOISE_OPTION_INIT_VAL) + return context->orig_opt_irq_disable; + + context->orig_opt_irq_disable = osnoise_options_get_option("OSNOISE_IRQ_DISABLE"); + + return context->orig_opt_irq_disable; +} + +int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff) +{ + int opt_irq_disable = osnoise_get_irq_disable(context); + int retval; + + if (opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return -1; + + if (opt_irq_disable == onoff) + return 0; + + retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", onoff); + if (retval < 0) + return -1; + + context->opt_irq_disable = onoff; + + return 0; +} + +static void osnoise_restore_irq_disable(struct osnoise_context *context) +{ + int retval; + + if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_opt_irq_disable == context->opt_irq_disable) + goto out_done; + + retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", context->orig_opt_irq_disable); + if (retval < 0) + err_msg("Could not restore original OSNOISE_IRQ_DISABLE option\n"); + +out_done: + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; +} + +static void osnoise_put_irq_disable(struct osnoise_context *context) +{ + osnoise_restore_irq_disable(context); + + if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; +} + +static int osnoise_get_workload(struct osnoise_context *context) +{ + if (context->opt_workload != OSNOISE_OPTION_INIT_VAL) + return context->opt_workload; + + if (context->orig_opt_workload != OSNOISE_OPTION_INIT_VAL) + return context->orig_opt_workload; + + context->orig_opt_workload = osnoise_options_get_option("OSNOISE_WORKLOAD"); + + return context->orig_opt_workload; +} + +int osnoise_set_workload(struct osnoise_context *context, bool onoff) +{ + int opt_workload = osnoise_get_workload(context); + int retval; + + if (opt_workload == OSNOISE_OPTION_INIT_VAL) + return -1; + + if (opt_workload == onoff) + return 0; + + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff); + if (retval < 0) + return -2; + + context->opt_workload = onoff; + + return 0; +} + +static void osnoise_restore_workload(struct osnoise_context *context) +{ + int retval; + + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_opt_workload == context->opt_workload) + goto out_done; + + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", context->orig_opt_workload); + if (retval < 0) + err_msg("Could not restore original OSNOISE_WORKLOAD option\n"); + +out_done: + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; +} + +static void osnoise_put_workload(struct osnoise_context *context) +{ + osnoise_restore_workload(context); + + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; +} + +enum { + FLAG_CONTEXT_NEWLY_CREATED = (1 << 0), + FLAG_CONTEXT_DELETED = (1 << 1), +}; + +/* + * osnoise_get_context - increase the usage of a context and return it + */ +int osnoise_get_context(struct osnoise_context *context) +{ + int ret; + + if (context->flags & FLAG_CONTEXT_DELETED) { + ret = -1; + } else { + context->ref++; + ret = 0; + } + + return ret; +} + +/* + * osnoise_context_alloc - alloc an osnoise_context + * + * The osnoise context contains the information of the "osnoise/" configs. + * It is used to set and restore the config. + */ +struct osnoise_context *osnoise_context_alloc(void) +{ + struct osnoise_context *context; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; + context->stop_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; + context->print_stack = OSNOISE_OPTION_INIT_VAL; + + context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL; + context->tracing_thresh = OSNOISE_OPTION_INIT_VAL; + + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; + context->opt_irq_disable = OSNOISE_OPTION_INIT_VAL; + + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; + context->opt_workload = OSNOISE_OPTION_INIT_VAL; + + osnoise_get_context(context); + + return context; +} + +/* + * osnoise_put_context - put the osnoise_put_context + * + * If there is no other user for the context, the original data + * is restored. + */ +void osnoise_put_context(struct osnoise_context *context) +{ + if (--context->ref < 1) + context->flags |= FLAG_CONTEXT_DELETED; + + if (!(context->flags & FLAG_CONTEXT_DELETED)) + return; + + osnoise_put_cpus(context); + osnoise_put_runtime_period(context); + osnoise_put_stop_us(context); + osnoise_put_stop_total_us(context); + osnoise_put_timerlat_period_us(context); + osnoise_put_print_stack(context); + osnoise_put_tracing_thresh(context); + osnoise_put_irq_disable(context); + osnoise_put_workload(context); + + free(context); +} + +/* + * osnoise_destroy_tool - disable trace, restore configs and free data + */ +void osnoise_destroy_tool(struct osnoise_tool *top) +{ + if (!top) + return; + + trace_instance_destroy(&top->trace); + + if (top->context) + osnoise_put_context(top->context); + + free(top); +} + +/* + * osnoise_init_tool - init an osnoise tool + * + * It allocs data, create a context to store data and + * creates a new trace instance for the tool. + */ +struct osnoise_tool *osnoise_init_tool(char *tool_name) +{ + struct osnoise_tool *top; + int retval; + + top = calloc(1, sizeof(*top)); + if (!top) + return NULL; + + top->context = osnoise_context_alloc(); + if (!top->context) + goto out_err; + + retval = trace_instance_init(&top->trace, tool_name); + if (retval) + goto out_err; + + return top; +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +/* + * osnoise_init_trace_tool - init a tracer instance to trace osnoise events + */ +struct osnoise_tool *osnoise_init_trace_tool(const char *tracer) +{ + struct osnoise_tool *trace; + int retval; + + trace = osnoise_init_tool("osnoise_trace"); + if (!trace) + return NULL; + + retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + retval = enable_tracer_by_name(trace->trace.inst, tracer); + if (retval) { + err_msg("Could not enable %s tracer for tracing\n", tracer); + goto out_err; + } + + return trace; +out_err: + osnoise_destroy_tool(trace); + return NULL; +} + +bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record) +{ + /* + * The tool instance is always present, it is the one used to collect + * data. + */ + if (!tracefs_trace_is_on(tool->trace.inst)) + return true; + + /* + * The trace record instance is only enabled when -t is set. IOW, when the system + * is tracing. + */ + return record && !tracefs_trace_is_on(record->trace.inst); +} + +/* + * osnoise_report_missed_events - report number of events dropped by trace + * buffer + */ +void +osnoise_report_missed_events(struct osnoise_tool *tool) +{ + unsigned long long total_events; + + if (tool->trace.missed_events == UINT64_MAX) + printf("unknown number of events missed, results might not be accurate\n"); + else if (tool->trace.missed_events > 0) { + total_events = tool->trace.processed_events + tool->trace.missed_events; + + printf("%lld (%.2f%%) events missed, results might not be accurate\n", + tool->trace.missed_events, + (double) tool->trace.missed_events / total_events * 100.0); + } +} + +/* + * osnoise_apply_config - apply osnoise configs to the initialized tool + */ +int +osnoise_apply_config(struct osnoise_tool *tool, struct osnoise_params *params) +{ + int retval; + + params->common.kernel_workload = true; + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + } else { + retval = osnoise_set_runtime_period(tool->context, + DEFAULT_SAMPLE_PERIOD, + DEFAULT_SAMPLE_RUNTIME); + } + + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + + retval = osnoise_set_stop_us(tool->context, params->common.stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + + retval = osnoise_set_stop_total_us(tool->context, params->common.stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + + retval = osnoise_set_tracing_thresh(tool->context, params->threshold); + if (retval) { + err_msg("Failed to set tracing_thresh\n"); + goto out_err; + } + + return common_apply_config(tool, ¶ms->common); + +out_err: + return -1; +} + +int osnoise_enable(struct osnoise_tool *tool) +{ + struct osnoise_params *params = to_osnoise_params(tool->params); + int retval; + + /* + * Start the tracer here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (tool->record) + trace_instance_start(&tool->record->trace); + trace_instance_start(&tool->trace); + + if (params->common.warmup > 0) { + debug_msg("Warming up for %d seconds\n", params->common.warmup); + sleep(params->common.warmup); + if (stop_tracing) + return -1; + + /* + * Clean up the buffer. The osnoise workload do not run + * with tracing off to avoid creating a performance penalty + * when not needed. + */ + retval = tracefs_instance_file_write(tool->trace.inst, "trace", ""); + if (retval < 0) { + debug_msg("Error cleaning up the buffer"); + return retval; + } + + } + + return 0; +} + +static void osnoise_usage(int err) +{ + int i; + + static const char *msg[] = { + "", + "osnoise version " VERSION, + "", + " usage: [rtla] osnoise [MODE] ...", + "", + " modes:", + " top - prints the summary from osnoise tracer", + " hist - prints a histogram of osnoise samples", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(err); +} + +int osnoise_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if osnoise was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + run_tool(&osnoise_top_ops, argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + osnoise_usage(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + run_tool(&osnoise_top_ops, argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + run_tool(&osnoise_top_ops, argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + run_tool(&osnoise_hist_ops, argc-1, &argv[1]); + exit(0); + } + +usage: + osnoise_usage(1); + exit(1); +} + +int hwnoise_main(int argc, char *argv[]) +{ + run_tool(&osnoise_top_ops, argc, argv); + exit(0); +} diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h new file mode 100644 index 000000000000..895687030c0b --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.h @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#pragma once + +#include "common.h" + +enum osnoise_mode { + MODE_OSNOISE = 0, + MODE_HWNOISE +}; + +struct osnoise_params { + struct common_params common; + unsigned long long runtime; + unsigned long long period; + long long threshold; + enum osnoise_mode mode; +}; + +#define to_osnoise_params(ptr) container_of(ptr, struct osnoise_params, common) + +/* + * *_INIT_VALs are also invalid values, they are used to + * communicate errors. + */ +#define OSNOISE_OPTION_INIT_VAL (-1) +#define OSNOISE_TIME_INIT_VAL (0) + +struct osnoise_context *osnoise_context_alloc(void); +int osnoise_get_context(struct osnoise_context *context); +void osnoise_put_context(struct osnoise_context *context); + +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period); +void osnoise_restore_runtime_period(struct osnoise_context *context); + +int osnoise_set_stop_us(struct osnoise_context *context, + long long stop_us); +void osnoise_restore_stop_us(struct osnoise_context *context); + +int osnoise_set_stop_total_us(struct osnoise_context *context, + long long stop_total_us); +void osnoise_restore_stop_total_us(struct osnoise_context *context); + +int osnoise_set_timerlat_period_us(struct osnoise_context *context, + long long timerlat_period_us); +void osnoise_restore_timerlat_period_us(struct osnoise_context *context); + +int osnoise_set_tracing_thresh(struct osnoise_context *context, + long long tracing_thresh); +void osnoise_restore_tracing_thresh(struct osnoise_context *context); + +void osnoise_restore_print_stack(struct osnoise_context *context); +int osnoise_set_print_stack(struct osnoise_context *context, + long long print_stack); + +int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff); +void osnoise_report_missed_events(struct osnoise_tool *tool); +int osnoise_apply_config(struct osnoise_tool *tool, struct osnoise_params *params); + +int osnoise_hist_main(int argc, char *argv[]); +int osnoise_top_main(int argc, char **argv); +int osnoise_enable(struct osnoise_tool *tool); +int osnoise_main(int argc, char **argv); +int hwnoise_main(int argc, char **argv); + +extern struct tool_ops timerlat_top_ops, timerlat_hist_ops; +extern struct tool_ops osnoise_top_ops, osnoise_hist_ops; + +int run_tool(struct tool_ops *ops, int argc, char *argv[]); +int hist_main_loop(struct osnoise_tool *tool); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c new file mode 100644 index 000000000000..ff8c231e47c4 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -0,0 +1,753 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <time.h> + +#include "osnoise.h" + +struct osnoise_hist_cpu { + int *samples; + int count; + + unsigned long long min_sample; + unsigned long long sum_sample; + unsigned long long max_sample; + +}; + +struct osnoise_hist_data { + struct tracefs_hist *trace_hist; + struct osnoise_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * osnoise_free_histogram - free runtime data + */ +static void +osnoise_free_histogram(struct osnoise_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].samples) + free(data->hist[cpu].samples); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +static void osnoise_free_hist_tool(struct osnoise_tool *tool) +{ + osnoise_free_histogram(tool->data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_hist_data +*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct osnoise_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1)); + if (!data->hist[cpu].samples) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) + data->hist[cpu].min_sample = ~0; + + return data; + +cleanup: + osnoise_free_histogram(data); + return NULL; +} + +static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, + unsigned long long duration, int count) +{ + struct osnoise_params *params = to_osnoise_params(tool->params); + struct osnoise_hist_data *data = tool->data; + unsigned long long total_duration; + int entries = data->entries; + int bucket; + int *hist; + + if (params->common.output_divisor) + duration = duration / params->common.output_divisor; + + bucket = duration / data->bucket_size; + + total_duration = duration * count; + + hist = data->hist[cpu].samples; + data->hist[cpu].count += count; + update_min(&data->hist[cpu].min_sample, &duration); + update_sum(&data->hist[cpu].sum_sample, &total_duration); + update_max(&data->hist[cpu].max_sample, &duration); + + if (bucket < entries) + hist[bucket] += count; + else + hist[entries] += count; +} + +/* + * osnoise_destroy_trace_hist - disable events used to collect histogram + */ +static void osnoise_destroy_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + tracefs_hist_destroy(tool->trace.inst, data->trace_hist); +} + +/* + * osnoise_init_trace_hist - enable events used to collect histogram + */ +static int osnoise_init_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_params *params = to_osnoise_params(tool->params); + struct osnoise_hist_data *data = tool->data; + int bucket_size; + char buff[128]; + int retval = 0; + + /* + * Set the size of the bucket. + */ + bucket_size = params->common.output_divisor * params->common.hist.bucket_size; + snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size); + + data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold", + buff, TRACEFS_HIST_KEY_NORMAL); + if (!data->trace_hist) + return 1; + + retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0); + if (retval) + goto out_err; + + retval = tracefs_hist_start(tool->trace.inst, data->trace_hist); + if (retval) + goto out_err; + + return 0; + +out_err: + osnoise_destroy_trace_hist(tool); + return 1; +} + +/* + * osnoise_read_trace_hist - parse histogram file and file osnoise histogram + */ +static void osnoise_read_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + long long cpu, counter, duration; + char *content, *position; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + + content = tracefs_event_file_read(tool->trace.inst, "osnoise", + "sample_threshold", + "hist", NULL); + if (!content) + return; + + position = content; + while (true) { + position = strstr(position, "duration: ~"); + if (!position) + break; + position += strlen("duration: ~"); + duration = get_llong_from_str(position); + if (duration == -1) + err_msg("error reading duration from histogram\n"); + + position = strstr(position, "cpu:"); + if (!position) + break; + position += strlen("cpu: "); + cpu = get_llong_from_str(position); + if (cpu == -1) + err_msg("error reading cpu from histogram\n"); + + position = strstr(position, "hitcount:"); + if (!position) + break; + position += strlen("hitcount: "); + counter = get_llong_from_str(position); + if (counter == -1) + err_msg("error reading counter from histogram\n"); + + osnoise_hist_update_multiple(tool, cpu, duration, counter); + } + free(content); +} + +/* + * osnoise_hist_header - print the header of the tracer to the output + */ +static void osnoise_hist_header(struct osnoise_tool *tool) +{ + struct osnoise_params *params = to_osnoise_params(tool->params); + struct osnoise_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->common.hist.no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA osnoise histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->common.output_divisor == 1 ? "nanoseconds" : "microseconds", + params->common.output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->common.hist.no_index) + trace_seq_printf(s, "Index"); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(s, " CPU-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * osnoise_print_summary - print the summary of the hist data to the output + */ +static void +osnoise_print_summary(struct osnoise_params *params, + struct trace_instance *trace, + struct osnoise_hist_data *data) +{ + int cpu; + + if (params->common.hist.no_summary) + return; + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "count:"); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "min: "); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample); + + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "avg: "); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].count) + continue; + + if (data->hist[cpu].count) + trace_seq_printf(trace->seq, "%9.2f ", + ((double) data->hist[cpu].sum_sample) / data->hist[cpu].count); + else + trace_seq_printf(trace->seq, " - "); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "max: "); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample); + + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_print_stats - print data for all CPUs + */ +static void +osnoise_print_stats(struct osnoise_tool *tool) +{ + struct osnoise_params *params = to_osnoise_params(tool->params); + struct osnoise_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int has_samples = 0; + int bucket, cpu; + int total; + + osnoise_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].count) + continue; + + total += data->hist[cpu].samples[bucket]; + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]); + } + + if (total == 0 && !params->common.hist.with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + /* There are samples above the threshold */ + has_samples = 1; + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + /* + * If no samples were recorded, skip calculations, print zeroed statistics + * and return. + */ + if (!has_samples) { + trace_seq_reset(trace->seq); + trace_seq_printf(trace->seq, "over: 0\ncount: 0\nmin: 0\navg: 0\nmax: 0\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + return; + } + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "over: "); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].samples[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + osnoise_print_summary(params, trace, data); + osnoise_report_missed_events(tool); +} + +/* + * osnoise_hist_usage - prints osnoise hist usage message + */ +static void osnoise_hist_usage(void) +{ + int i; + + static const char * const msg[] = { + "", + " usage: rtla osnoise hist [-h] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", + " [-T us] [-t [file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", + " [-c cpu-list] [-H cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros] [-C [cgroup_name]] [--warm-up]", + "", + " -h/--help: print this menu", + " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -T/--threshold us: the minimum delta to be considered a noise", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup [cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace [file]: save the stopped trace to [file|osnoise_trace.txt]", + " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", + " --filter <filter>: enable a trace event filter to the previous -e event", + " --trigger <trigger>: enable a trace event trigger to the previous -e event", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -E/--entries N: set the number of entries of the histogram (default 256)", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + " --warm-up: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --on-threshold <action>: define action to be executed at stop-total threshold, multiple are allowed", + " --on-end <action>: define action to be executed at measurement end, multiple are allowed", + NULL, + }; + + fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + + exit(EXIT_SUCCESS); +} + +/* + * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct common_params +*osnoise_hist_parse_args(int argc, char *argv[]) +{ + struct osnoise_params *params; + struct trace_events *tevent; + int retval; + int c; + char *trace_output = NULL; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + actions_init(¶ms->common.threshold_actions); + actions_init(¶ms->common.end_actions); + + /* display data in microseconds */ + params->common.output_divisor = 1000; + params->common.hist.bucket_size = 1; + params->common.hist.entries = 256; + + while (1) { + static struct option long_options[] = { + {"auto", required_argument, 0, 'a'}, + {"bucket-size", required_argument, 0, 'b'}, + {"entries", required_argument, 0, 'E'}, + {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {"event", required_argument, 0, 'e'}, + {"threshold", required_argument, 0, 'T'}, + {"no-header", no_argument, 0, '0'}, + {"no-summary", no_argument, 0, '1'}, + {"no-index", no_argument, 0, '2'}, + {"with-zeros", no_argument, 0, '3'}, + {"trigger", required_argument, 0, '4'}, + {"filter", required_argument, 0, '5'}, + {"warm-up", required_argument, 0, '6'}, + {"trace-buffer-size", required_argument, 0, '7'}, + {"on-threshold", required_argument, 0, '8'}, + {"on-end", required_argument, 0, '9'}, + {0, 0, 0, 0} + }; + + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:6:7:", + long_options, NULL); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'a': + /* set sample stop to auto_thresh */ + params->common.stop_us = get_llong_from_str(optarg); + + /* set sample threshold to 1 */ + params->threshold = 1; + + /* set trace */ + if (!trace_output) + trace_output = "osnoise_trace.txt"; + + break; + case 'b': + params->common.hist.bucket_size = get_llong_from_str(optarg); + if (params->common.hist.bucket_size == 0 || + params->common.hist.bucket_size >= 1000000) + fatal("Bucket size needs to be > 0 and <= 1000000"); + break; + case 'c': + retval = parse_cpu_set(optarg, ¶ms->common.monitored_cpus); + if (retval) + fatal("Invalid -c cpu list"); + params->common.cpus = optarg; + break; + case 'C': + params->common.cgroup = 1; + params->common.cgroup_name = parse_optional_arg(argc, argv); + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->common.duration = parse_seconds_duration(optarg); + if (!params->common.duration) + fatal("Invalid -D duration"); + break; + case 'e': + tevent = trace_event_alloc(optarg); + if (!tevent) + fatal("Error alloc trace event"); + + if (params->common.events) + tevent->next = params->common.events; + + params->common.events = tevent; + break; + case 'E': + params->common.hist.entries = get_llong_from_str(optarg); + if (params->common.hist.entries < 10 || + params->common.hist.entries > 9999999) + fatal("Entries must be > 10 and < 9999999"); + break; + case 'h': + case '?': + osnoise_hist_usage(); + break; + case 'H': + params->common.hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->common.hk_cpu_set); + if (retval) + fatal("Error parsing house keeping CPUs"); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + fatal("Period longer than 10 s"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->common.sched_param); + if (retval == -1) + fatal("Invalid -P priority"); + params->common.set_sched = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + fatal("Runtime shorter than 100 us"); + break; + case 's': + params->common.stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->common.stop_total_us = get_llong_from_str(optarg); + break; + case 'T': + params->threshold = get_llong_from_str(optarg); + break; + case 't': + trace_output = parse_optional_arg(argc, argv); + if (!trace_output) + trace_output = "osnoise_trace.txt"; + break; + case '0': /* no header */ + params->common.hist.no_header = 1; + break; + case '1': /* no summary */ + params->common.hist.no_summary = 1; + break; + case '2': /* no index */ + params->common.hist.no_index = 1; + break; + case '3': /* with zeros */ + params->common.hist.with_zeros = 1; + break; + case '4': /* trigger */ + if (params->common.events) { + retval = trace_event_add_trigger(params->common.events, optarg); + if (retval) + fatal("Error adding trigger %s", optarg); + } else { + fatal("--trigger requires a previous -e"); + } + break; + case '5': /* filter */ + if (params->common.events) { + retval = trace_event_add_filter(params->common.events, optarg); + if (retval) + fatal("Error adding filter %s", optarg); + } else { + fatal("--filter requires a previous -e"); + } + break; + case '6': + params->common.warmup = get_llong_from_str(optarg); + break; + case '7': + params->common.buffer_size = get_llong_from_str(optarg); + break; + case '8': + retval = actions_parse(¶ms->common.threshold_actions, optarg, + "osnoise_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + case '9': + retval = actions_parse(¶ms->common.end_actions, optarg, + "osnoise_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + default: + fatal("Invalid option"); + } + } + + if (trace_output) + actions_add_trace_output(¶ms->common.threshold_actions, trace_output); + + if (geteuid()) + fatal("rtla needs root permission"); + + if (params->common.hist.no_index && !params->common.hist.with_zeros) + fatal("no-index set and with-zeros not set - it does not make sense"); + + return ¶ms->common; +} + +/* + * osnoise_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +osnoise_hist_apply_config(struct osnoise_tool *tool) +{ + return osnoise_apply_config(tool, to_osnoise_params(tool->params)); +} + +/* + * osnoise_init_hist - initialize a osnoise hist tool with parameters + */ +static struct osnoise_tool +*osnoise_init_hist(struct common_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_hist"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_histogram(nr_cpus, params->hist.entries, + params->hist.bucket_size); + if (!tool->data) + goto out_err; + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int osnoise_hist_enable(struct osnoise_tool *tool) +{ + int retval; + + retval = osnoise_init_trace_hist(tool); + if (retval) + return retval; + + return osnoise_enable(tool); +} + +static int osnoise_hist_main_loop(struct osnoise_tool *tool) +{ + int retval; + + retval = hist_main_loop(tool); + osnoise_read_trace_hist(tool); + + return retval; +} + +struct tool_ops osnoise_hist_ops = { + .tracer = "osnoise", + .comm_prefix = "osnoise/", + .parse_args = osnoise_hist_parse_args, + .init_tool = osnoise_init_hist, + .apply_config = osnoise_hist_apply_config, + .enable = osnoise_hist_enable, + .main = osnoise_hist_main_loop, + .print_stats = osnoise_print_stats, + .free = osnoise_free_hist_tool, +}; diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c new file mode 100644 index 000000000000..04c699bdd736 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "osnoise.h" + +struct osnoise_top_cpu { + unsigned long long sum_runtime; + unsigned long long sum_noise; + unsigned long long max_noise; + unsigned long long max_sample; + + unsigned long long hw_count; + unsigned long long nmi_count; + unsigned long long irq_count; + unsigned long long softirq_count; + unsigned long long thread_count; + + int sum_cycles; +}; + +struct osnoise_top_data { + struct osnoise_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * osnoise_free_top - free runtime data + */ +static void osnoise_free_top(struct osnoise_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +static void osnoise_free_top_tool(struct osnoise_tool *tool) +{ + osnoise_free_top(tool->data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus) +{ + struct osnoise_top_data *data; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + return data; + +cleanup: + osnoise_free_top(data); + return NULL; +} + +/* + * osnoise_top_handler - this is the handler for osnoise tracer events + */ +static int +osnoise_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + struct osnoise_tool *tool; + unsigned long long val; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + cpu_data->sum_cycles++; + + tep_get_field_val(s, event, "runtime", record, &val, 1); + update_sum(&cpu_data->sum_runtime, &val); + + tep_get_field_val(s, event, "noise", record, &val, 1); + update_max(&cpu_data->max_noise, &val); + update_sum(&cpu_data->sum_noise, &val); + + tep_get_field_val(s, event, "max_sample", record, &val, 1); + update_max(&cpu_data->max_sample, &val); + + tep_get_field_val(s, event, "hw_count", record, &val, 1); + update_sum(&cpu_data->hw_count, &val); + + tep_get_field_val(s, event, "nmi_count", record, &val, 1); + update_sum(&cpu_data->nmi_count, &val); + + tep_get_field_val(s, event, "irq_count", record, &val, 1); + update_sum(&cpu_data->irq_count, &val); + + tep_get_field_val(s, event, "softirq_count", record, &val, 1); + update_sum(&cpu_data->softirq_count, &val); + + tep_get_field_val(s, event, "thread_count", record, &val, 1); + update_sum(&cpu_data->thread_count, &val); + + return 0; +} + +/* + * osnoise_top_header - print the header of the tool output + */ +static void osnoise_top_header(struct osnoise_tool *top) +{ + struct osnoise_params *params = to_osnoise_params(top->params); + struct trace_seq *s = top->trace.seq; + bool pretty = params->common.pretty_output; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + if (pretty) + trace_seq_printf(s, "\033[2;37;40m"); + + trace_seq_printf(s, " "); + + if (params->mode == MODE_OSNOISE) { + trace_seq_printf(s, "Operating System Noise"); + trace_seq_printf(s, " "); + } else if (params->mode == MODE_HWNOISE) { + trace_seq_printf(s, "Hardware-related Noise"); + } + + trace_seq_printf(s, " "); + + if (pretty) + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "duration: %9s | time is in us\n", duration); + + if (pretty) + trace_seq_printf(s, "\033[2;30;47m"); + + trace_seq_printf(s, "CPU Period Runtime "); + trace_seq_printf(s, " Noise "); + trace_seq_printf(s, " %% CPU Aval "); + trace_seq_printf(s, " Max Noise Max Single "); + trace_seq_printf(s, " HW NMI"); + + if (params->mode == MODE_HWNOISE) + goto eol; + + trace_seq_printf(s, " IRQ Softirq Thread"); + +eol: + if (pretty) + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * osnoise_top_print - prints the output of a given CPU + */ +static void osnoise_top_print(struct osnoise_tool *tool, int cpu) +{ + struct osnoise_params *params = to_osnoise_params(tool->params); + struct trace_seq *s = tool->trace.seq; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int percentage; + int decimal; + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + if (!cpu_data->sum_runtime) + return; + + percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000) + / cpu_data->sum_runtime; + decimal = percentage % 100000; + percentage = percentage / 100000; + + trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime); + trace_seq_printf(s, "%12llu ", cpu_data->sum_noise); + trace_seq_printf(s, " %3d.%05d", percentage, decimal); + trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample); + + trace_seq_printf(s, "%12llu ", cpu_data->hw_count); + trace_seq_printf(s, "%12llu ", cpu_data->nmi_count); + + if (params->mode == MODE_HWNOISE) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, "%12llu ", cpu_data->irq_count); + trace_seq_printf(s, "%12llu ", cpu_data->softirq_count); + trace_seq_printf(s, "%12llu\n", cpu_data->thread_count); +} + +/* + * osnoise_print_stats - print data for all cpus + */ +static void +osnoise_print_stats(struct osnoise_tool *top) +{ + struct osnoise_params *params = to_osnoise_params(top->params); + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->common.quiet) + clear_terminal(trace->seq); + + osnoise_top_header(top); + + for_each_monitored_cpu(i, nr_cpus, ¶ms->common) { + osnoise_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + osnoise_report_missed_events(top); +} + +/* + * osnoise_top_usage - prints osnoise top usage message + */ +static void osnoise_top_usage(struct osnoise_params *params) +{ + int i; + + static const char * const msg[] = { + " [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", + " [-T us] [-t [file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", + " [-c cpu-list] [-H cpu-list] [-P priority] [-C [cgroup_name]] [--warm-up s]", + "", + " -h/--help: print this menu", + " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -T/--threshold us: the minimum delta to be considered a noise", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup [cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace [file]: save the stopped trace to [file|osnoise_trace.txt]", + " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", + " --filter <filter>: enable a trace event filter to the previous -e event", + " --trigger <trigger>: enable a trace event trigger to the previous -e event", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --on-threshold <action>: define action to be executed at stop-total threshold, multiple are allowed", + " --on-end: define action to be executed at measurement end, multiple are allowed", + NULL, + }; + + if (params->mode == MODE_OSNOISE) { + fprintf(stderr, + "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n", + VERSION); + + fprintf(stderr, " usage: rtla osnoise [top]"); + } + + if (params->mode == MODE_HWNOISE) { + fprintf(stderr, + "rtla hwnoise: a summary of hardware-related noise (version %s)\n", + VERSION); + + fprintf(stderr, " usage: rtla hwnoise"); + } + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + + exit(EXIT_SUCCESS); +} + +/* + * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters + */ +struct common_params *osnoise_top_parse_args(int argc, char **argv) +{ + struct osnoise_params *params; + struct trace_events *tevent; + int retval; + int c; + char *trace_output = NULL; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + actions_init(¶ms->common.threshold_actions); + actions_init(¶ms->common.end_actions); + + if (strcmp(argv[0], "hwnoise") == 0) { + params->mode = MODE_HWNOISE; + /* + * Reduce CPU usage for 75% to avoid killing the system. + */ + params->runtime = 750000; + params->period = 1000000; + } + + while (1) { + static struct option long_options[] = { + {"auto", required_argument, 0, 'a'}, + {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"event", required_argument, 0, 'e'}, + {"house-keeping", required_argument, 0, 'H'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"threshold", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"trigger", required_argument, 0, '0'}, + {"filter", required_argument, 0, '1'}, + {"warm-up", required_argument, 0, '2'}, + {"trace-buffer-size", required_argument, 0, '3'}, + {"on-threshold", required_argument, 0, '4'}, + {"on-end", required_argument, 0, '5'}, + {0, 0, 0, 0} + }; + + c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:2:3:", + long_options, NULL); + + /* Detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'a': + /* set sample stop to auto_thresh */ + params->common.stop_us = get_llong_from_str(optarg); + + /* set sample threshold to 1 */ + params->threshold = 1; + + /* set trace */ + if (!trace_output) + trace_output = "osnoise_trace.txt"; + + break; + case 'c': + retval = parse_cpu_set(optarg, ¶ms->common.monitored_cpus); + if (retval) + fatal("Invalid -c cpu list"); + params->common.cpus = optarg; + break; + case 'C': + params->common.cgroup = 1; + params->common.cgroup_name = parse_optional_arg(argc, argv); + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->common.duration = parse_seconds_duration(optarg); + if (!params->common.duration) + fatal("Invalid -d duration"); + break; + case 'e': + tevent = trace_event_alloc(optarg); + if (!tevent) + fatal("Error alloc trace event"); + + if (params->common.events) + tevent->next = params->common.events; + params->common.events = tevent; + + break; + case 'h': + case '?': + osnoise_top_usage(params); + break; + case 'H': + params->common.hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->common.hk_cpu_set); + if (retval) + fatal("Error parsing house keeping CPUs"); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + fatal("Period longer than 10 s"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->common.sched_param); + if (retval == -1) + fatal("Invalid -P priority"); + params->common.set_sched = 1; + break; + case 'q': + params->common.quiet = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + fatal("Runtime shorter than 100 us"); + break; + case 's': + params->common.stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->common.stop_total_us = get_llong_from_str(optarg); + break; + case 't': + trace_output = parse_optional_arg(argc, argv); + if (!trace_output) + trace_output = "osnoise_trace.txt"; + break; + case 'T': + params->threshold = get_llong_from_str(optarg); + break; + case '0': /* trigger */ + if (params->common.events) { + retval = trace_event_add_trigger(params->common.events, optarg); + if (retval) + fatal("Error adding trigger %s", optarg); + } else { + fatal("--trigger requires a previous -e"); + } + break; + case '1': /* filter */ + if (params->common.events) { + retval = trace_event_add_filter(params->common.events, optarg); + if (retval) + fatal("Error adding filter %s", optarg); + } else { + fatal("--filter requires a previous -e"); + } + break; + case '2': + params->common.warmup = get_llong_from_str(optarg); + break; + case '3': + params->common.buffer_size = get_llong_from_str(optarg); + break; + case '4': + retval = actions_parse(¶ms->common.threshold_actions, optarg, + "osnoise_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + case '5': + retval = actions_parse(¶ms->common.end_actions, optarg, + "osnoise_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + default: + fatal("Invalid option"); + } + } + + if (trace_output) + actions_add_trace_output(¶ms->common.threshold_actions, trace_output); + + if (geteuid()) + fatal("osnoise needs root permission"); + + return ¶ms->common; +} + +/* + * osnoise_top_apply_config - apply the top configs to the initialized tool + */ +static int +osnoise_top_apply_config(struct osnoise_tool *tool) +{ + struct osnoise_params *params = to_osnoise_params(tool->params); + int retval; + + retval = osnoise_apply_config(tool, params); + if (retval) + goto out_err; + + if (params->mode == MODE_HWNOISE) { + retval = osnoise_set_irq_disable(tool->context, 1); + if (retval) { + err_msg("Failed to set OSNOISE_IRQ_DISABLE option\n"); + goto out_err; + } + } + + if (isatty(STDOUT_FILENO) && !params->common.quiet) + params->common.pretty_output = 1; + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_top - initialize a osnoise top tool with parameters + */ +struct osnoise_tool *osnoise_init_top(struct common_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_top"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_top(nr_cpus); + if (!tool->data) { + osnoise_destroy_tool(tool); + return NULL; + } + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise", + osnoise_top_handler, NULL); + + return tool; +} + +struct tool_ops osnoise_top_ops = { + .tracer = "osnoise", + .comm_prefix = "osnoise/", + .parse_args = osnoise_top_parse_args, + .init_tool = osnoise_init_top, + .apply_config = osnoise_top_apply_config, + .enable = osnoise_enable, + .main = top_main_loop, + .print_stats = osnoise_print_stats, + .free = osnoise_free_top_tool, +}; diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c new file mode 100644 index 000000000000..7635c70123ab --- /dev/null +++ b/tools/tracing/rtla/src/rtla.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "osnoise.h" +#include "timerlat.h" + +/* + * rtla_usage - print rtla usage + */ +static void rtla_usage(int err) +{ + int i; + + static const char *msg[] = { + "", + "rtla version " VERSION, + "", + " usage: rtla COMMAND ...", + "", + " commands:", + " osnoise - gives information about the operating system noise (osnoise)", + " hwnoise - gives information about hardware-related noise", + " timerlat - measures the timer irq and thread latency", + "", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(err); +} + +/* + * run_command - try to run a rtla tool command + * + * It returns 0 if it fails. The tool's main will generally not + * return as they should call exit(). + */ +int run_command(int argc, char **argv, int start_position) +{ + if (strcmp(argv[start_position], "osnoise") == 0) { + osnoise_main(argc-start_position, &argv[start_position]); + goto ran; + } else if (strcmp(argv[start_position], "hwnoise") == 0) { + hwnoise_main(argc-start_position, &argv[start_position]); + goto ran; + } else if (strcmp(argv[start_position], "timerlat") == 0) { + timerlat_main(argc-start_position, &argv[start_position]); + goto ran; + } + + return 0; +ran: + return 1; +} + +int main(int argc, char *argv[]) +{ + int retval; + + /* is it an alias? */ + retval = run_command(argc, argv, 0); + if (retval) + exit(0); + + if (argc < 2) + goto usage; + + if (strcmp(argv[1], "-h") == 0) { + rtla_usage(0); + } else if (strcmp(argv[1], "--help") == 0) { + rtla_usage(0); + } + + retval = run_command(argc, argv, 1); + if (retval) + exit(0); + +usage: + rtla_usage(1); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.bpf.c b/tools/tracing/rtla/src/timerlat.bpf.c new file mode 100644 index 000000000000..e2265b5d6491 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.bpf.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "timerlat_bpf.h" + +#define nosubprog __always_inline +#define MAX_ENTRIES_DEFAULT 4096 + +char LICENSE[] SEC("license") = "GPL"; + +struct trace_event_raw_timerlat_sample { + unsigned long long timer_latency; + int context; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_ENTRIES_DEFAULT); + __type(key, unsigned int); + __type(value, unsigned long long); +} hist_irq SEC(".maps"), hist_thread SEC(".maps"), hist_user SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, SUMMARY_FIELD_N); + __type(key, unsigned int); + __type(value, unsigned long long); +} summary_irq SEC(".maps"), summary_thread SEC(".maps"), summary_user SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, unsigned long long); +} stop_tracing SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1); +} signal_stop_tracing SEC(".maps"); + +/* Params to be set by rtla */ +const volatile int bucket_size = 1; +const volatile int output_divisor = 1000; +const volatile int entries = 256; +const volatile int irq_threshold; +const volatile int thread_threshold; +const volatile bool aa_only; + +nosubprog unsigned long long map_get(void *map, + unsigned int key) +{ + unsigned long long *value_ptr; + + value_ptr = bpf_map_lookup_elem(map, &key); + + return !value_ptr ? 0 : *value_ptr; +} + +nosubprog void map_set(void *map, + unsigned int key, + unsigned long long value) +{ + bpf_map_update_elem(map, &key, &value, BPF_ANY); +} + +nosubprog void map_increment(void *map, + unsigned int key) +{ + map_set(map, key, map_get(map, key) + 1); +} + +nosubprog void update_main_hist(void *map, + int bucket) +{ + if (entries == 0) + /* No histogram */ + return; + + if (bucket >= entries) + /* Overflow */ + return; + + map_increment(map, bucket); +} + +nosubprog void update_summary(void *map, + unsigned long long latency, + int bucket) +{ + if (aa_only) + /* Auto-analysis only, nothing to be done here */ + return; + + map_set(map, SUMMARY_CURRENT, latency); + + if (bucket >= entries) + /* Overflow */ + map_increment(map, SUMMARY_OVERFLOW); + + if (latency > map_get(map, SUMMARY_MAX)) + map_set(map, SUMMARY_MAX, latency); + + if (latency < map_get(map, SUMMARY_MIN) || map_get(map, SUMMARY_COUNT) == 0) + map_set(map, SUMMARY_MIN, latency); + + map_increment(map, SUMMARY_COUNT); + map_set(map, SUMMARY_SUM, map_get(map, SUMMARY_SUM) + latency); +} + +nosubprog void set_stop_tracing(void) +{ + int value = 0; + + /* Suppress further sample processing */ + map_set(&stop_tracing, 0, 1); + + /* Signal to userspace */ + bpf_ringbuf_output(&signal_stop_tracing, &value, sizeof(value), 0); +} + +SEC("tp/osnoise/timerlat_sample") +int handle_timerlat_sample(struct trace_event_raw_timerlat_sample *tp_args) +{ + unsigned long long latency, latency_us; + int bucket; + + if (map_get(&stop_tracing, 0)) + return 0; + + latency = tp_args->timer_latency / output_divisor; + latency_us = tp_args->timer_latency / 1000; + bucket = latency / bucket_size; + + if (tp_args->context == 0) { + update_main_hist(&hist_irq, bucket); + update_summary(&summary_irq, latency, bucket); + + if (irq_threshold != 0 && latency_us >= irq_threshold) + set_stop_tracing(); + } else if (tp_args->context == 1) { + update_main_hist(&hist_thread, bucket); + update_summary(&summary_thread, latency, bucket); + + if (thread_threshold != 0 && latency_us >= thread_threshold) + set_stop_tracing(); + } else { + update_main_hist(&hist_user, bucket); + update_summary(&summary_user, latency, bucket); + + if (thread_threshold != 0 && latency_us >= thread_threshold) + set_stop_tracing(); + } + + return 0; +} diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c new file mode 100644 index 000000000000..df4f9bfe3433 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ +#define _GNU_SOURCE +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <sched.h> + +#include "timerlat.h" +#include "timerlat_aa.h" +#include "timerlat_bpf.h" + +#define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ + +static int dma_latency_fd = -1; + +/* + * timerlat_apply_config - apply common configs to the initialized tool + */ +int +timerlat_apply_config(struct osnoise_tool *tool, struct timerlat_params *params) +{ + int retval; + + /* + * Try to enable BPF, unless disabled explicitly. + * If BPF enablement fails, fall back to tracefs mode. + */ + if (getenv("RTLA_NO_BPF") && strncmp(getenv("RTLA_NO_BPF"), "1", 2) == 0) { + debug_msg("RTLA_NO_BPF set, disabling BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } else if (!tep_find_event_by_name(tool->trace.tep, "osnoise", "timerlat_sample")) { + debug_msg("osnoise:timerlat_sample missing, disabling BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } else { + retval = timerlat_bpf_init(params); + if (retval) { + debug_msg("Could not enable BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } + } + + if (params->mode != TRACING_MODE_BPF) { + /* + * In tracefs and mixed mode, timerlat tracer handles stopping + * on threshold + */ + retval = osnoise_set_stop_us(tool->context, params->common.stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + + retval = osnoise_set_stop_total_us(tool->context, params->common.stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + + retval = osnoise_set_timerlat_period_us(tool->context, + params->timerlat_period_us ? + params->timerlat_period_us : + DEFAULT_TIMERLAT_PERIOD); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + + + retval = osnoise_set_print_stack(tool->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + + /* + * If the user did not specify a type of thread, try user-threads first. + * Fall back to kernel threads otherwise. + */ + if (!params->common.kernel_workload && !params->common.user_data) { + retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); + if (retval) { + debug_msg("User-space interface detected, setting user-threads\n"); + params->common.user_workload = 1; + params->common.user_data = 1; + } else { + debug_msg("User-space interface not detected, setting kernel-threads\n"); + params->common.kernel_workload = 1; + } + } + + return common_apply_config(tool, ¶ms->common); + +out_err: + return -1; +} + +int timerlat_enable(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval, nr_cpus, i; + + if (params->dma_latency >= 0) { + dma_latency_fd = set_cpu_dma_latency(params->dma_latency); + if (dma_latency_fd < 0) { + err_msg("Could not set /dev/cpu_dma_latency.\n"); + return -1; + } + } + + if (params->deepest_idle_state >= -1) { + if (!have_libcpupower_support()) { + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + return -1; + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for_each_monitored_cpu(i, nr_cpus, ¶ms->common) { + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + return -1; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + return -1; + } + } + } + + if (!params->no_aa) { + tool->aa = osnoise_init_tool("timerlat_aa"); + if (!tool->aa) + return -1; + + retval = timerlat_aa_init(tool->aa, params->dump_tasks); + if (retval) { + err_msg("Failed to enable the auto analysis instance\n"); + return retval; + } + + retval = enable_tracer_by_name(tool->aa->trace.inst, "timerlat"); + if (retval) { + err_msg("Failed to enable aa tracer\n"); + return retval; + } + } + + if (params->common.warmup > 0) { + debug_msg("Warming up for %d seconds\n", params->common.warmup); + sleep(params->common.warmup); + if (stop_tracing) + return -1; + } + + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (tool->record) + trace_instance_start(&tool->record->trace); + if (!params->no_aa) + trace_instance_start(&tool->aa->trace); + if (params->mode == TRACING_MODE_TRACEFS) { + trace_instance_start(&tool->trace); + } else { + retval = timerlat_bpf_attach(); + if (retval) { + err_msg("Error attaching BPF program\n"); + return retval; + } + } + + return 0; +} + +void timerlat_analyze(struct osnoise_tool *tool, bool stopped) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + + if (stopped) { + if (!params->no_aa) + timerlat_auto_analysis(params->common.stop_us, + params->common.stop_total_us); + } else if (params->common.aa_only) { + char *max_lat; + + /* + * If the trace did not stop with --aa-only, at least print + * the max known latency. + */ + max_lat = tracefs_instance_file_read(trace_inst->inst, "tracing_max_latency", NULL); + if (max_lat) { + printf(" Max latency was %s\n", max_lat); + free(max_lat); + } + } +} + +void timerlat_free(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + int i; + + timerlat_aa_destroy(); + if (dma_latency_fd >= 0) + close(dma_latency_fd); + if (params->deepest_idle_state >= -1) { + for_each_monitored_cpu(i, nr_cpus, ¶ms->common) { + restore_cpu_idle_disable_state(i); + } + } + + osnoise_destroy_tool(tool->aa); + + if (params->mode != TRACING_MODE_TRACEFS) + timerlat_bpf_destroy(); + free_cpu_idle_disable_states(); +} + +static void timerlat_usage(int err) +{ + int i; + + static const char * const msg[] = { + "", + "timerlat version " VERSION, + "", + " usage: [rtla] timerlat [MODE] ...", + "", + " modes:", + " top - prints the summary from timerlat tracer", + " hist - prints a histogram of timer latencies", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(err); +} + +int timerlat_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if timerlat was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + run_tool(&timerlat_top_ops, argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + timerlat_usage(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + run_tool(&timerlat_top_ops, argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + run_tool(&timerlat_top_ops, argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + run_tool(&timerlat_hist_ops, argc-1, &argv[1]); + exit(0); + } + +usage: + timerlat_usage(1); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h new file mode 100644 index 000000000000..fd6065f48bb7 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "osnoise.h" + +/* + * Define timerlat tracing mode. + * + * There are three tracing modes: + * - tracefs-only, used when BPF is unavailable. + * - BPF-only, used when BPF is available and neither trace saving nor + * auto-analysis are enabled. + * - mixed mode, used when BPF is available and either trace saving or + * auto-analysis is enabled (which rely on sample collection through + * tracefs). + */ +enum timerlat_tracing_mode { + TRACING_MODE_BPF, + TRACING_MODE_TRACEFS, + TRACING_MODE_MIXED, +}; + +struct timerlat_params { + struct common_params common; + long long timerlat_period_us; + long long print_stack; + int dma_latency; + int no_aa; + int dump_tasks; + int deepest_idle_state; + enum timerlat_tracing_mode mode; +}; + +#define to_timerlat_params(ptr) container_of(ptr, struct timerlat_params, common) + +int timerlat_apply_config(struct osnoise_tool *tool, struct timerlat_params *params); +int timerlat_main(int argc, char *argv[]); +int timerlat_enable(struct osnoise_tool *tool); +void timerlat_analyze(struct osnoise_tool *tool, bool stopped); +void timerlat_free(struct osnoise_tool *tool); + diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c new file mode 100644 index 000000000000..31e66ea2b144 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -0,0 +1,1056 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <stdlib.h> +#include <errno.h> +#include "timerlat.h" +#include <unistd.h> + +enum timelat_state { + TIMERLAT_INIT = 0, + TIMERLAT_WAITING_IRQ, + TIMERLAT_WAITING_THREAD, +}; + +/* Used to fill spaces in the output */ +static const char *spaces = " "; + +#define MAX_COMM 24 + +/* + * Per-cpu data statistics and data. + */ +struct timerlat_aa_data { + /* Current CPU state */ + int curr_state; + + /* timerlat IRQ latency */ + unsigned long long tlat_irq_seqnum; + unsigned long long tlat_irq_latency; + unsigned long long tlat_irq_timstamp; + + /* timerlat Thread latency */ + unsigned long long tlat_thread_seqnum; + unsigned long long tlat_thread_latency; + unsigned long long tlat_thread_timstamp; + + /* + * Information about the thread running when the IRQ + * arrived. + * + * This can be blocking or interference, depending on the + * priority of the thread. Assuming timerlat is the highest + * prio, it is blocking. If timerlat has a lower prio, it is + * interference. + * note: "unsigned long long" because they are fetch using tep_get_field_val(); + */ + unsigned long long run_thread_pid; + char run_thread_comm[MAX_COMM]; + unsigned long long thread_blocking_duration; + unsigned long long max_exit_idle_latency; + + /* Information about the timerlat timer irq */ + unsigned long long timer_irq_start_time; + unsigned long long timer_irq_start_delay; + unsigned long long timer_irq_duration; + unsigned long long timer_exit_from_idle; + + /* + * Information about the last IRQ before the timerlat irq + * arrived. + * + * If now - timestamp is <= latency, it might have influenced + * in the timerlat irq latency. Otherwise, ignore it. + */ + unsigned long long prev_irq_duration; + unsigned long long prev_irq_timstamp; + + /* + * Interference sum. + */ + unsigned long long thread_nmi_sum; + unsigned long long thread_irq_sum; + unsigned long long thread_softirq_sum; + unsigned long long thread_thread_sum; + + /* + * Interference task information. + */ + struct trace_seq *prev_irqs_seq; + struct trace_seq *nmi_seq; + struct trace_seq *irqs_seq; + struct trace_seq *softirqs_seq; + struct trace_seq *threads_seq; + struct trace_seq *stack_seq; + + /* + * Current thread. + */ + char current_comm[MAX_COMM]; + unsigned long long current_pid; + + /* + * Is the system running a kworker? + */ + unsigned long long kworker; + unsigned long long kworker_func; +}; + +/* + * The analysis context and system wide view + */ +struct timerlat_aa_context { + int nr_cpus; + int dump_tasks; + + /* per CPU data */ + struct timerlat_aa_data *taa_data; + + /* + * required to translate function names and register + * events. + */ + struct osnoise_tool *tool; +}; + +/* + * The data is stored as a local variable, but accessed via a helper function. + * + * It could be stored inside the trace context. But every access would + * require container_of() + a series of pointers. Do we need it? Not sure. + * + * For now keep it simple. If needed, store it in the tool, add the *context + * as a parameter in timerlat_aa_get_ctx() and do the magic there. + */ +static struct timerlat_aa_context *__timerlat_aa_ctx; + +static struct timerlat_aa_context *timerlat_aa_get_ctx(void) +{ + return __timerlat_aa_ctx; +} + +/* + * timerlat_aa_get_data - Get the per-cpu data from the timerlat context + */ +static struct timerlat_aa_data +*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu) +{ + return &taa_ctx->taa_data[cpu]; +} + +/* + * timerlat_aa_irq_latency - Handles timerlat IRQ event + */ +static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, + struct trace_seq *s, struct tep_record *record, + struct tep_event *event) +{ + /* + * For interference, we start now looking for things that can delay + * the thread. + */ + taa_data->curr_state = TIMERLAT_WAITING_THREAD; + taa_data->tlat_irq_timstamp = record->ts; + + /* + * Zero values. + */ + taa_data->thread_nmi_sum = 0; + taa_data->thread_irq_sum = 0; + taa_data->thread_softirq_sum = 0; + taa_data->thread_thread_sum = 0; + taa_data->thread_blocking_duration = 0; + taa_data->timer_irq_start_time = 0; + taa_data->timer_irq_duration = 0; + taa_data->timer_exit_from_idle = 0; + + /* + * Zero interference tasks. + */ + trace_seq_reset(taa_data->nmi_seq); + trace_seq_reset(taa_data->irqs_seq); + trace_seq_reset(taa_data->softirqs_seq); + trace_seq_reset(taa_data->threads_seq); + + /* IRQ latency values */ + tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1); + tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1); + + /* The thread that can cause blocking */ + tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1); + + /* + * Get exit from idle case. + * + * If it is not idle thread: + */ + if (taa_data->run_thread_pid) + return 0; + + /* + * if the latency is shorter than the known exit from idle: + */ + if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency) + return 0; + + /* + * To be safe, ignore the cases in which an IRQ/NMI could have + * interfered with the timerlat IRQ. + */ + if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency + < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) + return 0; + + taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency; + + return 0; +} + +/* + * timerlat_aa_thread_latency - Handles timerlat thread event + */ +static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, + struct trace_seq *s, struct tep_record *record, + struct tep_event *event) +{ + /* + * For interference, we start now looking for things that can delay + * the IRQ of the next cycle. + */ + taa_data->curr_state = TIMERLAT_WAITING_IRQ; + taa_data->tlat_thread_timstamp = record->ts; + + /* Thread latency values */ + tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1); + tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1); + + return 0; +} + +/* + * timerlat_aa_handler - Handle timerlat events + * + * This function is called to handle timerlat events recording statistics. + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long thread; + + if (!taa_data) + return -1; + + tep_get_field_val(s, event, "context", record, &thread, 1); + if (!thread) + return timerlat_aa_irq_latency(taa_data, s, record, event); + else + return timerlat_aa_thread_latency(taa_data, s, record, event); +} + +/* + * timerlat_aa_nmi_handler - Handles NMI noise + * + * It is used to collect information about interferences from NMI. It is + * hooked to the osnoise:nmi_noise event. + */ +static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long start; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { + taa_data->prev_irq_duration = duration; + taa_data->prev_irq_timstamp = start; + + trace_seq_reset(taa_data->prev_irqs_seq); + trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n", + "nmi", + 24, spaces, + ns_to_usf(duration)); + return 0; + } + + taa_data->thread_nmi_sum += duration; + trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n", + "nmi", + 24, spaces, ns_to_usf(duration)); + + return 0; +} + +/* + * timerlat_aa_irq_handler - Handles IRQ noise + * + * It is used to collect information about interferences from IRQ. It is + * hooked to the osnoise:irq_noise event. + * + * It is a little bit more complex than the other because it measures: + * - The IRQs that can delay the timer IRQ before it happened. + * - The Timerlat IRQ handler + * - The IRQs that happened between the timerlat IRQ and the timerlat thread + * (IRQ interference). + */ +static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long expected_start; + unsigned long long duration; + unsigned long long vector; + unsigned long long start; + char *desc; + int val; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + tep_get_field_val(s, event, "vector", record, &vector, 1); + desc = tep_get_field_raw(s, event, "desc", record, &val, 1); + + /* + * Before the timerlat IRQ. + */ + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { + taa_data->prev_irq_duration = duration; + taa_data->prev_irq_timstamp = start; + + trace_seq_reset(taa_data->prev_irqs_seq); + trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + desc, vector, + 15, spaces, + ns_to_usf(duration)); + return 0; + } + + /* + * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at + * the timerlat irq handler. + */ + if (!taa_data->timer_irq_start_time) { + expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency; + + taa_data->timer_irq_start_time = start; + taa_data->timer_irq_duration = duration; + + /* + * We are dealing with two different clock sources: the + * external clock source that timerlat uses as a reference + * and the clock used by the tracer. There are also two + * moments: the time reading the clock and the timer in + * which the event is placed in the buffer (the trace + * event timestamp). If the processor is slow or there + * is some hardware noise, the difference between the + * timestamp and the external clock read can be longer + * than the IRQ handler delay, resulting in a negative + * time. If so, set IRQ start delay as 0. In the end, + * it is less relevant than the noise. + */ + if (expected_start < taa_data->timer_irq_start_time) + taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; + else + taa_data->timer_irq_start_delay = 0; + + /* + * not exit from idle. + */ + if (taa_data->run_thread_pid) + return 0; + + if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) + taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay; + + return 0; + } + + /* + * IRQ interference. + */ + taa_data->thread_irq_sum += duration; + trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + desc, vector, + 24, spaces, + ns_to_usf(duration)); + + return 0; +} + +static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", + "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; + + +/* + * timerlat_aa_softirq_handler - Handles Softirq noise + * + * It is used to collect information about interferences from Softirq. It is + * hooked to the osnoise:softirq_noise event. + * + * It is only printed in the non-rt kernel, as softirqs become thread on RT. + */ +static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long vector; + unsigned long long start; + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) + return 0; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + tep_get_field_val(s, event, "vector", record, &vector, 1); + + taa_data->thread_softirq_sum += duration; + + trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + softirq_name[vector], vector, + 24, spaces, + ns_to_usf(duration)); + return 0; +} + +/* + * timerlat_aa_softirq_handler - Handles thread noise + * + * It is used to collect information about interferences from threads. It is + * hooked to the osnoise:thread_noise event. + * + * Note: if you see thread noise, your timerlat thread was not the highest prio one. + */ +static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long start; + unsigned long long pid; + const char *comm; + int val; + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) + return 0; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + + tep_get_common_field_val(s, event, "common_pid", record, &pid, 1); + comm = tep_get_field_raw(s, event, "comm", record, &val, 1); + + if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) { + taa_data->thread_blocking_duration = duration; + + if (comm) + strncpy(taa_data->run_thread_comm, comm, MAX_COMM); + else + sprintf(taa_data->run_thread_comm, "<...>"); + + } else { + taa_data->thread_thread_sum += duration; + + trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n", + comm, pid, + 15, spaces, + ns_to_usf(duration)); + } + + return 0; +} + +/* + * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace + * + * Saves and parse the stack trace generated by the timerlat IRQ. + */ +static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long *caller; + const char *function; + int val, i; + + trace_seq_reset(taa_data->stack_seq); + + trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n"); + caller = tep_get_field_raw(s, event, "caller", record, &val, 1); + if (caller) { + for (i = 0; ; i++) { + function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); + if (!function) + break; + trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n", + 14, spaces, function); + } + } + return 0; +} + +/* + * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU + * + * Handles the sched:sched_switch event to trace the current thread running on the + * CPU. It is used to display the threads running on the other CPUs when the trace + * stops. + */ +static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + const char *comm; + int val; + + tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1); + comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1); + + strncpy(taa_data->current_comm, comm, MAX_COMM); + + /* + * If this was a kworker, clean the last kworkers that ran. + */ + taa_data->kworker = 0; + taa_data->kworker_func = 0; + + return 0; +} + +/* + * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU + * + * Handles workqueue:workqueue_execute_start event, keeping track of + * the job that a kworker could be doing in the CPU. + * + * We already catch problems of hardware related latencies caused by work queues + * running driver code that causes hardware stall. For example, with DRM drivers. + */ +static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + + tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1); + tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1); + return 0; +} + +/* + * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing + * + * This is the core of the analysis. + */ +static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + int irq_thresh, int thread_thresh) +{ + long long exp_irq_ts; + int total; + int irq; + + /* + * IRQ latency or Thread latency? + */ + if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) { + irq = 1; + total = taa_data->tlat_irq_latency; + } else { + irq = 0; + total = taa_data->tlat_thread_latency; + } + + /* + * Expected IRQ arrival time using the trace clock as the base. + * + * TODO: Add a list of previous IRQ, and then run the list backwards. + */ + exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; + if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { + if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) + printf(" Previous IRQ interference: %.*s up to %9.2f us\n", + 16, spaces, + ns_to_usf(taa_data->prev_irq_duration)); + } + + /* + * The delay that the IRQ suffered before starting. + */ + printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces, + (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", + ns_to_usf(taa_data->timer_irq_start_delay), + ns_to_per(total, taa_data->timer_irq_start_delay)); + + /* + * Timerlat IRQ. + */ + printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces, + ns_to_usf(taa_data->tlat_irq_latency)); + + if (irq) { + /* + * If the trace stopped due to IRQ, the other events will not happen + * because... the trace stopped :-). + * + * That is all folks, the stack trace was printed before the stop, + * so it will be displayed, it is the key. + */ + printf(" Blocking thread:\n"); + printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm, + taa_data->run_thread_pid); + } else { + /* + * The duration of the IRQ handler that handled the timerlat IRQ. + */ + printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n", + 30, spaces, + ns_to_usf(taa_data->timer_irq_duration), + ns_to_per(total, taa_data->timer_irq_duration)); + + /* + * The amount of time that the current thread postponed the scheduler. + * + * Recalling that it is net from NMI/IRQ/Softirq interference, so there + * is no need to compute values here. + */ + printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_blocking_duration), + ns_to_per(total, taa_data->thread_blocking_duration)); + + printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces, + taa_data->run_thread_comm, taa_data->run_thread_pid, + 12, spaces, ns_to_usf(taa_data->thread_blocking_duration)); + } + + /* + * Print the stack trace! + */ + trace_seq_do_printf(taa_data->stack_seq); + + /* + * NMIs can happen during the IRQ, so they are always possible. + */ + if (taa_data->thread_nmi_sum) + printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_nmi_sum), + ns_to_per(total, taa_data->thread_nmi_sum)); + + /* + * If it is an IRQ latency, the other factors can be skipped. + */ + if (irq) + goto print_total; + + /* + * Prints the interference caused by IRQs to the thread latency. + */ + if (taa_data->thread_irq_sum) { + printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_irq_sum), + ns_to_per(total, taa_data->thread_irq_sum)); + + trace_seq_do_printf(taa_data->irqs_seq); + } + + /* + * Prints the interference caused by Softirqs to the thread latency. + */ + if (taa_data->thread_softirq_sum) { + printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces, + ns_to_usf(taa_data->thread_softirq_sum), + ns_to_per(total, taa_data->thread_softirq_sum)); + + trace_seq_do_printf(taa_data->softirqs_seq); + } + + /* + * Prints the interference caused by other threads to the thread latency. + * + * If this happens, your timerlat is not the highest prio. OK, migration + * thread can happen. But otherwise, you are not measuring the "scheduling + * latency" only, and here is the difference from scheduling latency and + * timer handling latency. + */ + if (taa_data->thread_thread_sum) { + printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces, + ns_to_usf(taa_data->thread_thread_sum), + ns_to_per(total, taa_data->thread_thread_sum)); + + trace_seq_do_printf(taa_data->threads_seq); + } + + /* + * Done. + */ +print_total: + printf("------------------------------------------------------------------------\n"); + printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread", + 37, spaces, ns_to_usf(total)); +} + +static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) +{ + struct trace_instance *trace = &taa_ctx->tool->trace; + int retval; + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + return 0; + } + + return 1; +} + +/** + * timerlat_auto_analysis - Analyze the collected data + */ +void timerlat_auto_analysis(int irq_thresh, int thread_thresh) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + unsigned long long max_exit_from_idle = 0; + struct timerlat_aa_data *taa_data; + int max_exit_from_idle_cpu; + struct tep_handle *tep; + int cpu; + + timerlat_auto_analysis_collect_trace(taa_ctx); + + /* bring stop tracing to the ns scale */ + irq_thresh = irq_thresh * 1000; + thread_thresh = thread_thresh * 1000; + + for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { + taa_data = timerlat_aa_get_data(taa_ctx, cpu); + + if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) { + printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); + timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); + } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) { + printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); + timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); + } + + if (taa_data->max_exit_idle_latency > max_exit_from_idle) { + max_exit_from_idle = taa_data->max_exit_idle_latency; + max_exit_from_idle_cpu = cpu; + } + + } + + if (max_exit_from_idle) { + printf("\n"); + printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n", + ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu); + } + if (!taa_ctx->dump_tasks) + return; + + printf("\n"); + printf("Printing CPU tasks:\n"); + for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { + taa_data = timerlat_aa_get_data(taa_ctx, cpu); + tep = taa_ctx->tool->trace.tep; + + printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid); + + if (taa_data->kworker_func) + printf(" kworker:%s:%s", + tep_find_function(tep, taa_data->kworker) ? : "<...>", + tep_find_function(tep, taa_data->kworker_func)); + printf("\n"); + } + +} + +/* + * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data + */ +static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx) +{ + struct timerlat_aa_data *taa_data; + int i; + + if (!taa_ctx->taa_data) + return; + + for (i = 0; i < taa_ctx->nr_cpus; i++) { + taa_data = timerlat_aa_get_data(taa_ctx, i); + + if (taa_data->prev_irqs_seq) { + trace_seq_destroy(taa_data->prev_irqs_seq); + free(taa_data->prev_irqs_seq); + } + + if (taa_data->nmi_seq) { + trace_seq_destroy(taa_data->nmi_seq); + free(taa_data->nmi_seq); + } + + if (taa_data->irqs_seq) { + trace_seq_destroy(taa_data->irqs_seq); + free(taa_data->irqs_seq); + } + + if (taa_data->softirqs_seq) { + trace_seq_destroy(taa_data->softirqs_seq); + free(taa_data->softirqs_seq); + } + + if (taa_data->threads_seq) { + trace_seq_destroy(taa_data->threads_seq); + free(taa_data->threads_seq); + } + + if (taa_data->stack_seq) { + trace_seq_destroy(taa_data->stack_seq); + free(taa_data->stack_seq); + } + } +} + +/* + * timerlat_aa_init_seqs - Init seq files used to store parsed information + * + * Instead of keeping data structures to store raw data, use seq files to + * store parsed data. + * + * Allocates and initialize seq files. + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx) +{ + struct timerlat_aa_data *taa_data; + int i; + + for (i = 0; i < taa_ctx->nr_cpus; i++) { + + taa_data = timerlat_aa_get_data(taa_ctx, i); + + taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq)); + if (!taa_data->prev_irqs_seq) + goto out_err; + + trace_seq_init(taa_data->prev_irqs_seq); + + taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq)); + if (!taa_data->nmi_seq) + goto out_err; + + trace_seq_init(taa_data->nmi_seq); + + taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq)); + if (!taa_data->irqs_seq) + goto out_err; + + trace_seq_init(taa_data->irqs_seq); + + taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq)); + if (!taa_data->softirqs_seq) + goto out_err; + + trace_seq_init(taa_data->softirqs_seq); + + taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq)); + if (!taa_data->threads_seq) + goto out_err; + + trace_seq_init(taa_data->threads_seq); + + taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq)); + if (!taa_data->stack_seq) + goto out_err; + + trace_seq_init(taa_data->stack_seq); + } + + return 0; + +out_err: + timerlat_aa_destroy_seqs(taa_ctx); + return -1; +} + +/* + * timerlat_aa_unregister_events - Unregister events used in the auto-analysis + */ +static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) +{ + + tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_aa_handler, tool); + + tracefs_event_disable(tool->trace.inst, "osnoise", NULL); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", + timerlat_aa_nmi_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", + timerlat_aa_irq_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", + timerlat_aa_softirq_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", + timerlat_aa_thread_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", + timerlat_aa_stack_handler, tool); + if (!dump_tasks) + return; + + tracefs_event_disable(tool->trace.inst, "sched", "sched_switch"); + tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch", + timerlat_aa_sched_switch_handler, tool); + + tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start"); + tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", + timerlat_aa_kworker_start_handler, tool); +} + +/* + * timerlat_aa_register_events - Register events used in the auto-analysis + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks) +{ + int retval; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_aa_handler, tool); + + + /* + * register auto-analysis handlers. + */ + retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", + timerlat_aa_nmi_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", + timerlat_aa_irq_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", + timerlat_aa_softirq_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", + timerlat_aa_thread_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", + timerlat_aa_stack_handler, tool); + + if (!dump_tasks) + return 0; + + /* + * Dump task events. + */ + retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch"); + if (retval < 0 && !errno) { + err_msg("Could not find sched_switch\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch", + timerlat_aa_sched_switch_handler, tool); + + retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start"); + if (retval < 0 && !errno) { + err_msg("Could not find workqueue_execute_start\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", + timerlat_aa_kworker_start_handler, tool); + + return 0; + +out_err: + timerlat_aa_unregister_events(tool, dump_tasks); + return -1; +} + +/** + * timerlat_aa_destroy - Destroy timerlat auto-analysis + */ +void timerlat_aa_destroy(void) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + + if (!taa_ctx) + return; + + if (!taa_ctx->taa_data) + goto out_ctx; + + timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks); + timerlat_aa_destroy_seqs(taa_ctx); + free(taa_ctx->taa_data); +out_ctx: + free(taa_ctx); +} + +/** + * timerlat_aa_init - Initialize timerlat auto-analysis + * + * Returns 0 on success, -1 otherwise. + */ +int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + struct timerlat_aa_context *taa_ctx; + int retval; + + taa_ctx = calloc(1, sizeof(*taa_ctx)); + if (!taa_ctx) + return -1; + + __timerlat_aa_ctx = taa_ctx; + + taa_ctx->nr_cpus = nr_cpus; + taa_ctx->tool = tool; + taa_ctx->dump_tasks = dump_tasks; + + taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data)); + if (!taa_ctx->taa_data) + goto out_err; + + retval = timerlat_aa_init_seqs(taa_ctx); + if (retval) + goto out_err; + + retval = timerlat_aa_register_events(tool, dump_tasks); + if (retval) + goto out_err; + + return 0; + +out_err: + timerlat_aa_destroy(); + return -1; +} diff --git a/tools/tracing/rtla/src/timerlat_aa.h b/tools/tracing/rtla/src/timerlat_aa.h new file mode 100644 index 000000000000..cea4bb1531a8 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_aa.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +int timerlat_aa_init(struct osnoise_tool *tool, int dump_task); +void timerlat_aa_destroy(void); + +void timerlat_auto_analysis(int irq_thresh, int thread_thresh); diff --git a/tools/tracing/rtla/src/timerlat_bpf.c b/tools/tracing/rtla/src/timerlat_bpf.c new file mode 100644 index 000000000000..e97d16646bcd --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_bpf.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifdef HAVE_BPF_SKEL +#define _GNU_SOURCE +#include "timerlat.h" +#include "timerlat_bpf.h" +#include "timerlat.skel.h" + +static struct timerlat_bpf *bpf; + +/* + * timerlat_bpf_init - load and initialize BPF program to collect timerlat data + */ +int timerlat_bpf_init(struct timerlat_params *params) +{ + int err; + + debug_msg("Loading BPF program\n"); + + bpf = timerlat_bpf__open(); + if (!bpf) + return 1; + + /* Pass common options */ + bpf->rodata->output_divisor = params->common.output_divisor; + bpf->rodata->entries = params->common.hist.entries; + bpf->rodata->irq_threshold = params->common.stop_us; + bpf->rodata->thread_threshold = params->common.stop_total_us; + bpf->rodata->aa_only = params->common.aa_only; + + if (params->common.hist.entries != 0) { + /* Pass histogram options */ + bpf->rodata->bucket_size = params->common.hist.bucket_size; + + /* Set histogram array sizes */ + bpf_map__set_max_entries(bpf->maps.hist_irq, params->common.hist.entries); + bpf_map__set_max_entries(bpf->maps.hist_thread, params->common.hist.entries); + bpf_map__set_max_entries(bpf->maps.hist_user, params->common.hist.entries); + } else { + /* No entries, disable histogram */ + bpf_map__set_autocreate(bpf->maps.hist_irq, false); + bpf_map__set_autocreate(bpf->maps.hist_thread, false); + bpf_map__set_autocreate(bpf->maps.hist_user, false); + } + + if (params->common.aa_only) { + /* Auto-analysis only, disable summary */ + bpf_map__set_autocreate(bpf->maps.summary_irq, false); + bpf_map__set_autocreate(bpf->maps.summary_thread, false); + bpf_map__set_autocreate(bpf->maps.summary_user, false); + } + + /* Load and verify BPF program */ + err = timerlat_bpf__load(bpf); + if (err) { + timerlat_bpf__destroy(bpf); + return err; + } + + return 0; +} + +/* + * timerlat_bpf_attach - attach BPF program to collect timerlat data + */ +int timerlat_bpf_attach(void) +{ + debug_msg("Attaching BPF program\n"); + + return timerlat_bpf__attach(bpf); +} + +/* + * timerlat_bpf_detach - detach BPF program to collect timerlat data + */ +void timerlat_bpf_detach(void) +{ + timerlat_bpf__detach(bpf); +} + +/* + * timerlat_bpf_detach - destroy BPF program to collect timerlat data + */ +void timerlat_bpf_destroy(void) +{ + timerlat_bpf__destroy(bpf); +} + +static int handle_rb_event(void *ctx, void *data, size_t data_sz) +{ + return 0; +} + +/* + * timerlat_bpf_wait - wait until tracing is stopped or signal + */ +int timerlat_bpf_wait(int timeout) +{ + struct ring_buffer *rb; + int retval; + + rb = ring_buffer__new(bpf_map__fd(bpf->maps.signal_stop_tracing), + handle_rb_event, NULL, NULL); + retval = ring_buffer__poll(rb, timeout * 1000); + ring_buffer__free(rb); + + return retval; +} + +/* + * timerlat_bpf_restart_tracing - restart stopped tracing + */ +int timerlat_bpf_restart_tracing(void) +{ + unsigned int key = 0; + unsigned long long value = 0; + + return bpf_map__update_elem(bpf->maps.stop_tracing, + &key, sizeof(key), + &value, sizeof(value), BPF_ANY); +} + +static int get_value(struct bpf_map *map_irq, + struct bpf_map *map_thread, + struct bpf_map *map_user, + int key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + int err; + + err = bpf_map__lookup_elem(map_irq, &key, + sizeof(unsigned int), value_irq, + sizeof(long long) * cpus, 0); + if (err) + return err; + err = bpf_map__lookup_elem(map_thread, &key, + sizeof(unsigned int), value_thread, + sizeof(long long) * cpus, 0); + if (err) + return err; + err = bpf_map__lookup_elem(map_user, &key, + sizeof(unsigned int), value_user, + sizeof(long long) * cpus, 0); + if (err) + return err; + return 0; +} + +/* + * timerlat_bpf_get_hist_value - get value from BPF hist map + */ +int timerlat_bpf_get_hist_value(int key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + return get_value(bpf->maps.hist_irq, + bpf->maps.hist_thread, + bpf->maps.hist_user, + key, value_irq, value_thread, value_user, cpus); +} + +/* + * timerlat_bpf_get_summary_value - get value from BPF summary map + */ +int timerlat_bpf_get_summary_value(enum summary_field key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + return get_value(bpf->maps.summary_irq, + bpf->maps.summary_thread, + bpf->maps.summary_user, + key, value_irq, value_thread, value_user, cpus); +} +#endif /* HAVE_BPF_SKEL */ diff --git a/tools/tracing/rtla/src/timerlat_bpf.h b/tools/tracing/rtla/src/timerlat_bpf.h new file mode 100644 index 000000000000..118487436d30 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_bpf.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#pragma once + +enum summary_field { + SUMMARY_CURRENT, + SUMMARY_MIN, + SUMMARY_MAX, + SUMMARY_COUNT, + SUMMARY_SUM, + SUMMARY_OVERFLOW, + SUMMARY_FIELD_N +}; + +#ifndef __bpf__ +#ifdef HAVE_BPF_SKEL +int timerlat_bpf_init(struct timerlat_params *params); +int timerlat_bpf_attach(void); +void timerlat_bpf_detach(void); +void timerlat_bpf_destroy(void); +int timerlat_bpf_wait(int timeout); +int timerlat_bpf_restart_tracing(void); +int timerlat_bpf_get_hist_value(int key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus); +int timerlat_bpf_get_summary_value(enum summary_field key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus); + +static inline int have_libbpf_support(void) { return 1; } +#else +static inline int timerlat_bpf_init(struct timerlat_params *params) +{ + return -1; +} +static inline int timerlat_bpf_attach(void) { return -1; } +static inline void timerlat_bpf_detach(void) { }; +static inline void timerlat_bpf_destroy(void) { }; +static inline int timerlat_bpf_wait(int timeout) { return -1; } +static inline int timerlat_bpf_restart_tracing(void) { return -1; }; +static inline int timerlat_bpf_get_hist_value(int key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + return -1; +} +static inline int timerlat_bpf_get_summary_value(enum summary_field key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + return -1; +} +static inline int have_libbpf_support(void) { return 0; } +#endif /* HAVE_BPF_SKEL */ +#endif /* __bpf__ */ diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c new file mode 100644 index 000000000000..1fb471a787b7 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -0,0 +1,1160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> +#include <sched.h> +#include <pthread.h> + +#include "timerlat.h" +#include "timerlat_aa.h" +#include "timerlat_bpf.h" + +struct timerlat_hist_cpu { + int *irq; + int *thread; + int *user; + + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; + + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; + + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; +}; + +struct timerlat_hist_data { + struct timerlat_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * timerlat_free_histogram - free runtime data + */ +static void +timerlat_free_histogram(struct timerlat_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].irq) + free(data->hist[cpu].irq); + + if (data->hist[cpu].thread) + free(data->hist[cpu].thread); + + if (data->hist[cpu].user) + free(data->hist[cpu].user); + + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); +} + +static void timerlat_free_histogram_tool(struct osnoise_tool *tool) +{ + timerlat_free_histogram(tool->data); + timerlat_free(tool); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_hist_data +*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct timerlat_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + /* one histogram for IRQ and one for thread, per cpu */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); + if (!data->hist[cpu].irq) + goto cleanup; + + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); + if (!data->hist[cpu].thread) + goto cleanup; + + data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1)); + if (!data->hist[cpu].user) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].min_irq = ~0; + data->hist[cpu].min_thread = ~0; + data->hist[cpu].min_user = ~0; + } + + return data; + +cleanup: + timerlat_free_histogram(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_hist_update(struct osnoise_tool *tool, int cpu, + unsigned long long context, + unsigned long long latency) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + struct timerlat_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->common.output_divisor) + latency = latency / params->common.output_divisor; + + bucket = latency / data->bucket_size; + + if (!context) { + hist = data->hist[cpu].irq; + data->hist[cpu].irq_count++; + update_min(&data->hist[cpu].min_irq, &latency); + update_sum(&data->hist[cpu].sum_irq, &latency); + update_max(&data->hist[cpu].max_irq, &latency); + } else if (context == 1) { + hist = data->hist[cpu].thread; + data->hist[cpu].thread_count++; + update_min(&data->hist[cpu].min_thread, &latency); + update_sum(&data->hist[cpu].sum_thread, &latency); + update_max(&data->hist[cpu].max_thread, &latency); + } else { /* user */ + hist = data->hist[cpu].user; + data->hist[cpu].user_count++; + update_min(&data->hist[cpu].min_user, &latency); + update_sum(&data->hist[cpu].sum_user, &latency); + update_max(&data->hist[cpu].max_user, &latency); + } + + if (bucket < entries) + hist[bucket]++; + else + hist[entries]++; +} + +/* + * timerlat_hist_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *data) +{ + struct trace_instance *trace = data; + unsigned long long context, latency; + struct osnoise_tool *tool; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &context, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_hist_update(tool, cpu, context, latency); + + return 0; +} + +/* + * timerlat_hist_bpf_pull_data - copy data from BPF maps into userspace + */ +static int timerlat_hist_bpf_pull_data(struct osnoise_tool *tool) +{ + struct timerlat_hist_data *data = tool->data; + int i, j, err; + long long value_irq[data->nr_cpus], + value_thread[data->nr_cpus], + value_user[data->nr_cpus]; + + /* Pull histogram */ + for (i = 0; i < data->entries; i++) { + err = timerlat_bpf_get_hist_value(i, value_irq, value_thread, + value_user, data->nr_cpus); + if (err) + return err; + for (j = 0; j < data->nr_cpus; j++) { + data->hist[j].irq[i] = value_irq[j]; + data->hist[j].thread[i] = value_thread[j]; + data->hist[j].user[i] = value_user[j]; + } + } + + /* Pull summary */ + err = timerlat_bpf_get_summary_value(SUMMARY_COUNT, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].irq_count = value_irq[i]; + data->hist[i].thread_count = value_thread[i]; + data->hist[i].user_count = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_MIN, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].min_irq = value_irq[i]; + data->hist[i].min_thread = value_thread[i]; + data->hist[i].min_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_MAX, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].max_irq = value_irq[i]; + data->hist[i].max_thread = value_thread[i]; + data->hist[i].max_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_SUM, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].sum_irq = value_irq[i]; + data->hist[i].sum_thread = value_thread[i]; + data->hist[i].sum_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_OVERFLOW, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].irq[data->entries] = value_irq[i]; + data->hist[i].thread[data->entries] = value_thread[i]; + data->hist[i].user[data->entries] = value_user[i]; + } + + return 0; +} + +/* + * timerlat_hist_header - print the header of the tracer to the output + */ +static void timerlat_hist_header(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + struct timerlat_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->common.hist.no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA timerlat histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->common.output_divisor == 1 ? "nanoseconds" : "microseconds", + params->common.output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->common.hist.no_index) + trace_seq_printf(s, "Index"); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->common.hist.no_irq) + trace_seq_printf(s, " IRQ-%03d", cpu); + + if (!params->common.hist.no_thread) + trace_seq_printf(s, " Thr-%03d", cpu); + + if (params->common.user_data) + trace_seq_printf(s, " Usr-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * format_summary_value - format a line of summary value (min, max or avg) + * of hist data + */ +static void format_summary_value(struct trace_seq *seq, + int count, + unsigned long long val, + bool avg) +{ + if (count) + trace_seq_printf(seq, "%9llu ", avg ? val / count : val); + else + trace_seq_printf(seq, "%9c ", '-'); +} + +/* + * timerlat_print_summary - print the summary of the hist data to the output + */ +static void +timerlat_print_summary(struct timerlat_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + int cpu; + + if (params->common.hist.no_summary) + return; + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "count:"); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->common.hist.no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].irq_count); + + if (!params->common.hist.no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].thread_count); + + if (params->common.user_data) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].user_count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "min: "); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].min_irq, + false); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].min_thread, + false); + + if (params->common.user_data) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].min_user, + false); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "avg: "); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].sum_irq, + true); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].sum_thread, + true); + + if (params->common.user_data) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].sum_user, + true); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "max: "); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].max_irq, + false); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].max_thread, + false); + + if (params->common.user_data) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].max_user, + false); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +static void +timerlat_print_stats_all(struct timerlat_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + struct timerlat_hist_cpu *cpu_data; + struct timerlat_hist_cpu sum; + int cpu; + + if (params->common.hist.no_summary) + return; + + memset(&sum, 0, sizeof(sum)); + sum.min_irq = ~0; + sum.min_thread = ~0; + sum.min_user = ~0; + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + cpu_data = &data->hist[cpu]; + + sum.irq_count += cpu_data->irq_count; + update_min(&sum.min_irq, &cpu_data->min_irq); + update_sum(&sum.sum_irq, &cpu_data->sum_irq); + update_max(&sum.max_irq, &cpu_data->max_irq); + + sum.thread_count += cpu_data->thread_count; + update_min(&sum.min_thread, &cpu_data->min_thread); + update_sum(&sum.sum_thread, &cpu_data->sum_thread); + update_max(&sum.max_thread, &cpu_data->max_thread); + + sum.user_count += cpu_data->user_count; + update_min(&sum.min_user, &cpu_data->min_user); + update_sum(&sum.sum_user, &cpu_data->sum_user); + update_max(&sum.max_user, &cpu_data->max_user); + } + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "ALL: "); + + if (!params->common.hist.no_irq) + trace_seq_printf(trace->seq, " IRQ"); + + if (!params->common.hist.no_thread) + trace_seq_printf(trace->seq, " Thr"); + + if (params->common.user_data) + trace_seq_printf(trace->seq, " Usr"); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "count:"); + + if (!params->common.hist.no_irq) + trace_seq_printf(trace->seq, "%9llu ", + sum.irq_count); + + if (!params->common.hist.no_thread) + trace_seq_printf(trace->seq, "%9llu ", + sum.thread_count); + + if (params->common.user_data) + trace_seq_printf(trace->seq, "%9llu ", + sum.user_count); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "min: "); + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + sum.irq_count, + sum.min_irq, + false); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + sum.thread_count, + sum.min_thread, + false); + + if (params->common.user_data) + format_summary_value(trace->seq, + sum.user_count, + sum.min_user, + false); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "avg: "); + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + sum.irq_count, + sum.sum_irq, + true); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + sum.thread_count, + sum.sum_thread, + true); + + if (params->common.user_data) + format_summary_value(trace->seq, + sum.user_count, + sum.sum_user, + true); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "max: "); + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + sum.irq_count, + sum.max_irq, + false); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + sum.thread_count, + sum.max_thread, + false); + + if (params->common.user_data) + format_summary_value(trace->seq, + sum.user_count, + sum.max_user, + false); + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_print_stats - print data for each CPUs + */ +static void +timerlat_print_stats(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + struct timerlat_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + timerlat_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->common.hist.no_irq) { + total += data->hist[cpu].irq[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[bucket]); + } + + if (!params->common.hist.no_thread) { + total += data->hist[cpu].thread[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[bucket]); + } + + if (params->common.user_data) { + total += data->hist[cpu].user[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[bucket]); + } + + } + + if (total == 0 && !params->common.hist.with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "over: "); + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->common.hist.no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[data->entries]); + + if (!params->common.hist.no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[data->entries]); + + if (params->common.user_data) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + timerlat_print_summary(params, trace, data); + timerlat_print_stats_all(params, trace, data); + osnoise_report_missed_events(tool); +} + +/* + * timerlat_hist_usage - prints timerlat top usage message + */ +static void timerlat_hist_usage(void) +{ + int i; + + char *msg[] = { + "", + " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\", + " [-t [file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", + " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros] [--dma-latency us] [-C [cgroup_name]] [--no-aa] [--dump-task] [-u|-k]", + " [--warm-up s] [--deepest-idle-state n]", + "", + " -h/--help: print this menu", + " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup [cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", + " -D/--debug: print debug info", + " -t/--trace [file]: save the stopped trace to [file|timerlat_trace.txt]", + " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", + " --filter <filter>: enable a trace event filter to the previous -e event", + " --trigger <trigger>: enable a trace event trigger to the previous -e event", + " -n/--nano: display data in nanoseconds", + " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -E/--entries N: set the number of entries of the histogram (default 256)", + " --no-irq: ignore IRQ latencies", + " --no-thread: ignore thread latencies", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads", + " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads", + " -U/--user-load: enable timerlat for user-defined user-space workload", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", + " --on-threshold <action>: define action to be executed at latency threshold, multiple are allowed", + " --on-end <action>: define action to be executed at measurement end, multiple are allowed", + NULL, + }; + + fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + + exit(EXIT_SUCCESS); +} + +/* + * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct common_params +*timerlat_hist_parse_args(int argc, char *argv[]) +{ + struct timerlat_params *params; + struct trace_events *tevent; + int auto_thresh; + int retval; + int c; + char *trace_output = NULL; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + actions_init(¶ms->common.threshold_actions); + actions_init(¶ms->common.end_actions); + + /* disabled by default */ + params->dma_latency = -1; + + /* disabled by default */ + params->deepest_idle_state = -2; + + /* display data in microseconds */ + params->common.output_divisor = 1000; + params->common.hist.bucket_size = 1; + params->common.hist.entries = 256; + + /* default to BPF mode */ + params->mode = TRACING_MODE_BPF; + + while (1) { + static struct option long_options[] = { + {"auto", required_argument, 0, 'a'}, + {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, + {"bucket-size", required_argument, 0, 'b'}, + {"debug", no_argument, 0, 'D'}, + {"entries", required_argument, 0, 'E'}, + {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, + {"kernel-threads", no_argument, 0, 'k'}, + {"user-load", no_argument, 0, 'U'}, + {"event", required_argument, 0, 'e'}, + {"no-irq", no_argument, 0, '0'}, + {"no-thread", no_argument, 0, '1'}, + {"no-header", no_argument, 0, '2'}, + {"no-summary", no_argument, 0, '3'}, + {"no-index", no_argument, 0, '4'}, + {"with-zeros", no_argument, 0, '5'}, + {"trigger", required_argument, 0, '6'}, + {"filter", required_argument, 0, '7'}, + {"dma-latency", required_argument, 0, '8'}, + {"no-aa", no_argument, 0, '9'}, + {"dump-task", no_argument, 0, '\1'}, + {"warm-up", required_argument, 0, '\2'}, + {"trace-buffer-size", required_argument, 0, '\3'}, + {"deepest-idle-state", required_argument, 0, '\4'}, + {"on-threshold", required_argument, 0, '\5'}, + {"on-end", required_argument, 0, '\6'}, + {0, 0, 0, 0} + }; + + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3:", + long_options, NULL); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'a': + auto_thresh = get_llong_from_str(optarg); + + /* set thread stop to auto_thresh */ + params->common.stop_total_us = auto_thresh; + params->common.stop_us = auto_thresh; + + /* get stack trace */ + params->print_stack = auto_thresh; + + /* set trace */ + if (!trace_output) + trace_output = "timerlat_trace.txt"; + + break; + case 'c': + retval = parse_cpu_set(optarg, ¶ms->common.monitored_cpus); + if (retval) + fatal("Invalid -c cpu list"); + params->common.cpus = optarg; + break; + case 'C': + params->common.cgroup = 1; + params->common.cgroup_name = parse_optional_arg(argc, argv); + break; + case 'b': + params->common.hist.bucket_size = get_llong_from_str(optarg); + if (params->common.hist.bucket_size == 0 || + params->common.hist.bucket_size >= 1000000) + fatal("Bucket size needs to be > 0 and <= 1000000"); + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->common.duration = parse_seconds_duration(optarg); + if (!params->common.duration) + fatal("Invalid -D duration"); + break; + case 'e': + tevent = trace_event_alloc(optarg); + if (!tevent) + fatal("Error alloc trace event"); + + if (params->common.events) + tevent->next = params->common.events; + + params->common.events = tevent; + break; + case 'E': + params->common.hist.entries = get_llong_from_str(optarg); + if (params->common.hist.entries < 10 || + params->common.hist.entries > 9999999) + fatal("Entries must be > 10 and < 9999999"); + break; + case 'h': + case '?': + timerlat_hist_usage(); + break; + case 'H': + params->common.hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->common.hk_cpu_set); + if (retval) + fatal("Error parsing house keeping CPUs"); + break; + case 'i': + params->common.stop_us = get_llong_from_str(optarg); + break; + case 'k': + params->common.kernel_workload = 1; + break; + case 'n': + params->common.output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + fatal("Period longer than 1 s"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->common.sched_param); + if (retval == -1) + fatal("Invalid -P priority"); + params->common.set_sched = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->common.stop_total_us = get_llong_from_str(optarg); + break; + case 't': + trace_output = parse_optional_arg(argc, argv); + if (!trace_output) + trace_output = "timerlat_trace.txt"; + break; + case 'u': + params->common.user_workload = 1; + /* fallback: -u implies in -U */ + case 'U': + params->common.user_data = 1; + break; + case '0': /* no irq */ + params->common.hist.no_irq = 1; + break; + case '1': /* no thread */ + params->common.hist.no_thread = 1; + break; + case '2': /* no header */ + params->common.hist.no_header = 1; + break; + case '3': /* no summary */ + params->common.hist.no_summary = 1; + break; + case '4': /* no index */ + params->common.hist.no_index = 1; + break; + case '5': /* with zeros */ + params->common.hist.with_zeros = 1; + break; + case '6': /* trigger */ + if (params->common.events) { + retval = trace_event_add_trigger(params->common.events, optarg); + if (retval) + fatal("Error adding trigger %s", optarg); + } else { + fatal("--trigger requires a previous -e"); + } + break; + case '7': /* filter */ + if (params->common.events) { + retval = trace_event_add_filter(params->common.events, optarg); + if (retval) + fatal("Error adding filter %s", optarg); + } else { + fatal("--filter requires a previous -e"); + } + break; + case '8': + params->dma_latency = get_llong_from_str(optarg); + if (params->dma_latency < 0 || params->dma_latency > 10000) + fatal("--dma-latency needs to be >= 0 and < 10000"); + break; + case '9': + params->no_aa = 1; + break; + case '\1': + params->dump_tasks = 1; + break; + case '\2': + params->common.warmup = get_llong_from_str(optarg); + break; + case '\3': + params->common.buffer_size = get_llong_from_str(optarg); + break; + case '\4': + params->deepest_idle_state = get_llong_from_str(optarg); + break; + case '\5': + retval = actions_parse(¶ms->common.threshold_actions, optarg, + "timerlat_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + case '\6': + retval = actions_parse(¶ms->common.end_actions, optarg, + "timerlat_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + default: + fatal("Invalid option"); + } + } + + if (trace_output) + actions_add_trace_output(¶ms->common.threshold_actions, trace_output); + + if (geteuid()) + fatal("rtla needs root permission"); + + if (params->common.hist.no_irq && params->common.hist.no_thread) + fatal("no-irq and no-thread set, there is nothing to do here"); + + if (params->common.hist.no_index && !params->common.hist.with_zeros) + fatal("no-index set with with-zeros is not set - it does not make sense"); + + /* + * Auto analysis only happens if stop tracing, thus: + */ + if (!params->common.stop_us && !params->common.stop_total_us) + params->no_aa = 1; + + if (params->common.kernel_workload && params->common.user_workload) + fatal("--kernel-threads and --user-threads are mutually exclusive!"); + + /* + * If auto-analysis or trace output is enabled, switch from BPF mode to + * mixed mode + */ + if (params->mode == TRACING_MODE_BPF && + (params->common.threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->common.end_actions.present[ACTION_TRACE_OUTPUT] || + !params->no_aa)) + params->mode = TRACING_MODE_MIXED; + + return ¶ms->common; +} + +/* + * timerlat_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +timerlat_hist_apply_config(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval; + + retval = timerlat_apply_config(tool, params); + if (retval) + goto out_err; + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_hist - initialize a timerlat hist tool with parameters + */ +static struct osnoise_tool +*timerlat_init_hist(struct common_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("timerlat_hist"); + if (!tool) + return NULL; + + tool->data = timerlat_alloc_histogram(nr_cpus, params->hist.entries, + params->hist.bucket_size); + if (!tool->data) + goto out_err; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_hist_handler, tool); + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int timerlat_hist_bpf_main_loop(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval; + + while (!stop_tracing) { + timerlat_bpf_wait(-1); + + if (!stop_tracing) { + /* Threshold overflow, perform actions on threshold */ + actions_perform(¶ms->common.threshold_actions); + + if (!params->common.threshold_actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (tool->record) + trace_instance_start(&tool->record->trace); + if (tool->aa) + trace_instance_start(&tool->aa->trace); + timerlat_bpf_restart_tracing(); + } + } + timerlat_bpf_detach(); + + retval = timerlat_hist_bpf_pull_data(tool); + if (retval) + err_msg("Error pulling BPF data\n"); + + return retval; +} + +static int timerlat_hist_main(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval; + + if (params->mode == TRACING_MODE_TRACEFS) + retval = hist_main_loop(tool); + else + retval = timerlat_hist_bpf_main_loop(tool); + + return retval; +} + +struct tool_ops timerlat_hist_ops = { + .tracer = "timerlat", + .comm_prefix = "timerlat/", + .parse_args = timerlat_hist_parse_args, + .init_tool = timerlat_init_hist, + .apply_config = timerlat_hist_apply_config, + .enable = timerlat_enable, + .main = timerlat_hist_main, + .print_stats = timerlat_print_stats, + .analyze = timerlat_analyze, + .free = timerlat_free_histogram_tool, +}; diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c new file mode 100644 index 000000000000..29c2c1f717ed --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -0,0 +1,935 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> +#include <errno.h> +#include <sched.h> +#include <pthread.h> + +#include "timerlat.h" +#include "timerlat_aa.h" +#include "timerlat_bpf.h" + +struct timerlat_top_cpu { + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; + + unsigned long long cur_irq; + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long cur_thread; + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; + + unsigned long long cur_user; + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; +}; + +struct timerlat_top_data { + struct timerlat_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * timerlat_free_top - free runtime data + */ +static void timerlat_free_top(struct timerlat_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +static void timerlat_free_top_tool(struct osnoise_tool *tool) +{ + timerlat_free_top(tool->data); + timerlat_free(tool); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) +{ + struct timerlat_top_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->cpu_data[cpu].min_irq = ~0; + data->cpu_data[cpu].min_thread = ~0; + data->cpu_data[cpu].min_user = ~0; + } + + return data; + +cleanup: + timerlat_free_top(data); + return NULL; +} + +static void +timerlat_top_reset_sum(struct timerlat_top_cpu *summary) +{ + memset(summary, 0, sizeof(*summary)); + summary->min_irq = ~0; + summary->min_thread = ~0; + summary->min_user = ~0; +} + +static void +timerlat_top_update_sum(struct osnoise_tool *tool, int cpu, struct timerlat_top_cpu *sum) +{ + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + sum->irq_count += cpu_data->irq_count; + update_min(&sum->min_irq, &cpu_data->min_irq); + update_sum(&sum->sum_irq, &cpu_data->sum_irq); + update_max(&sum->max_irq, &cpu_data->max_irq); + + sum->thread_count += cpu_data->thread_count; + update_min(&sum->min_thread, &cpu_data->min_thread); + update_sum(&sum->sum_thread, &cpu_data->sum_thread); + update_max(&sum->max_thread, &cpu_data->max_thread); + + sum->user_count += cpu_data->user_count; + update_min(&sum->min_user, &cpu_data->min_user); + update_sum(&sum->sum_user, &cpu_data->sum_user); + update_max(&sum->max_user, &cpu_data->max_user); +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_top_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + if (params->common.output_divisor) + latency = latency / params->common.output_divisor; + + if (!thread) { + cpu_data->irq_count++; + cpu_data->cur_irq = latency; + update_min(&cpu_data->min_irq, &latency); + update_sum(&cpu_data->sum_irq, &latency); + update_max(&cpu_data->max_irq, &latency); + } else if (thread == 1) { + cpu_data->thread_count++; + cpu_data->cur_thread = latency; + update_min(&cpu_data->min_thread, &latency); + update_sum(&cpu_data->sum_thread, &latency); + update_max(&cpu_data->max_thread, &latency); + } else { + cpu_data->user_count++; + cpu_data->cur_user = latency; + update_min(&cpu_data->min_user, &latency); + update_sum(&cpu_data->sum_user, &latency); + update_max(&cpu_data->max_user, &latency); + } +} + +/* + * timerlat_top_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + unsigned long long latency, thread; + struct osnoise_tool *top; + int cpu = record->cpu; + + top = container_of(trace, struct osnoise_tool, trace); + + if (!top->params->aa_only) { + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_top_update(top, cpu, thread, latency); + } + + return 0; +} + +/* + * timerlat_top_bpf_pull_data - copy data from BPF maps into userspace + */ +static int timerlat_top_bpf_pull_data(struct osnoise_tool *tool) +{ + struct timerlat_top_data *data = tool->data; + int i, err; + long long value_irq[data->nr_cpus], + value_thread[data->nr_cpus], + value_user[data->nr_cpus]; + + /* Pull summary */ + err = timerlat_bpf_get_summary_value(SUMMARY_CURRENT, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].cur_irq = value_irq[i]; + data->cpu_data[i].cur_thread = value_thread[i]; + data->cpu_data[i].cur_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_COUNT, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].irq_count = value_irq[i]; + data->cpu_data[i].thread_count = value_thread[i]; + data->cpu_data[i].user_count = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_MIN, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].min_irq = value_irq[i]; + data->cpu_data[i].min_thread = value_thread[i]; + data->cpu_data[i].min_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_MAX, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].max_irq = value_irq[i]; + data->cpu_data[i].max_thread = value_thread[i]; + data->cpu_data[i].max_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_SUM, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].sum_irq = value_irq[i]; + data->cpu_data[i].sum_thread = value_thread[i]; + data->cpu_data[i].sum_user = value_user[i]; + } + + return 0; +} + +/* + * timerlat_top_header - print the header of the tool output + */ +static void timerlat_top_header(struct timerlat_params *params, struct osnoise_tool *top) +{ + struct trace_seq *s = top->trace.seq; + bool pretty = params->common.pretty_output; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + if (pretty) + trace_seq_printf(s, "\033[2;37;40m"); + + trace_seq_printf(s, " Timer Latency "); + if (params->common.user_data) + trace_seq_printf(s, " "); + + if (pretty) + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration, + params->common.output_divisor == 1 ? "ns" : "us", + params->common.output_divisor == 1 ? "ns" : "us"); + + if (params->common.user_data) { + trace_seq_printf(s, " | Ret user Timer Latency (%s)", + params->common.output_divisor == 1 ? "ns" : "us"); + } + + trace_seq_printf(s, "\n"); + if (pretty) + trace_seq_printf(s, "\033[2;30;47m"); + + trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); + if (params->common.user_data) + trace_seq_printf(s, " | cur min avg max"); + + if (pretty) + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +static const char *no_value = " -"; + +/* + * timerlat_top_print - prints the output of a given CPU + */ +static void timerlat_top_print(struct osnoise_tool *top, int cpu) +{ + struct timerlat_params *params = to_timerlat_params(top->params); + struct timerlat_top_data *data = top->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + struct trace_seq *s = top->trace.seq; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!cpu_data->irq_count && !cpu_data->thread_count) + return; + + /* + * Unless trace is being lost, IRQ counter is always the max. + */ + trace_seq_printf(s, "%3d #%-9llu |", cpu, cpu_data->irq_count); + + if (!cpu_data->irq_count) { + trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_irq); + trace_seq_printf(s, "%9llu ", cpu_data->min_irq); + trace_seq_printf(s, "%9llu ", cpu_data->sum_irq / cpu_data->irq_count); + trace_seq_printf(s, "%9llu |", cpu_data->max_irq); + } + + if (!cpu_data->thread_count) { + trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_thread); + trace_seq_printf(s, "%9llu ", cpu_data->min_thread); + trace_seq_printf(s, "%9llu ", + cpu_data->sum_thread / cpu_data->thread_count); + trace_seq_printf(s, "%9llu", cpu_data->max_thread); + } + + if (!params->common.user_data) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, " |"); + + if (!cpu_data->user_count) { + trace_seq_printf(s, "%s %s %s %s\n", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_user); + trace_seq_printf(s, "%9llu ", cpu_data->min_user); + trace_seq_printf(s, "%9llu ", + cpu_data->sum_user / cpu_data->user_count); + trace_seq_printf(s, "%9llu\n", cpu_data->max_user); + } +} + +/* + * timerlat_top_print_sum - prints the summary output + */ +static void +timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summary) +{ + const char *split = "----------------------------------------"; + struct timerlat_params *params = to_timerlat_params(top->params); + unsigned long long count = summary->irq_count; + struct trace_seq *s = top->trace.seq; + int e = 0; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!summary->irq_count && !summary->thread_count) + return; + + while (count > 999999) { + e++; + count /= 10; + } + + trace_seq_printf(s, "%.*s|%.*s|%.*s", 15, split, 40, split, 39, split); + if (params->common.user_data) + trace_seq_printf(s, "-|%.*s", 39, split); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "ALL #%-6llu e%d |", count, e); + + if (!summary->irq_count) { + trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value); + } else { + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_irq); + trace_seq_printf(s, "%9llu ", summary->sum_irq / summary->irq_count); + trace_seq_printf(s, "%9llu |", summary->max_irq); + } + + if (!summary->thread_count) { + trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_thread); + trace_seq_printf(s, "%9llu ", + summary->sum_thread / summary->thread_count); + trace_seq_printf(s, "%9llu", summary->max_thread); + } + + if (!params->common.user_data) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, " |"); + + if (!summary->user_count) { + trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value); + } else { + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_user); + trace_seq_printf(s, "%9llu ", + summary->sum_user / summary->user_count); + trace_seq_printf(s, "%9llu\n", summary->max_user); + } +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * timerlat_print_stats - print data for all cpus + */ +static void +timerlat_print_stats(struct osnoise_tool *top) +{ + struct timerlat_params *params = to_timerlat_params(top->params); + struct trace_instance *trace = &top->trace; + struct timerlat_top_cpu summary; + static int nr_cpus = -1; + int i; + + if (params->common.aa_only) + return; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->common.quiet) + clear_terminal(trace->seq); + + timerlat_top_reset_sum(&summary); + + timerlat_top_header(params, top); + + for_each_monitored_cpu(i, nr_cpus, ¶ms->common) { + timerlat_top_print(top, i); + timerlat_top_update_sum(top, i, &summary); + } + + timerlat_top_print_sum(top, &summary); + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + osnoise_report_missed_events(top); +} + +/* + * timerlat_top_usage - prints timerlat top usage message + */ +static void timerlat_top_usage(void) +{ + int i; + + static const char *const msg[] = { + "", + " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", + " [[-t [file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", + " [-P priority] [--dma-latency us] [--aa-only us] [-C [cgroup_name]] [-u|-k] [--warm-up s] [--deepest-idle-state n]", + "", + " -h/--help: print this menu", + " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", + " --aa-only us: stop if <us> latency is hit, only printing the auto analysis (reduces CPU usage)", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup [cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", + " -t/--trace [file]: save the stopped trace to [file|timerlat_trace.txt]", + " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", + " --filter <command>: enable a trace event filter to the previous -e event", + " --trigger <command>: enable a trace event trigger to the previous -e event", + " -n/--nano: display data in nanoseconds", + " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", + " -q/--quiet print only a summary at the end", + " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads", + " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads", + " -U/--user-load: enable timerlat for user-defined user-space workload", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", + " --on-threshold <action>: define action to be executed at latency threshold, multiple are allowed", + " --on-end: define action to be executed at measurement end, multiple are allowed", + NULL, + }; + + fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + + exit(EXIT_SUCCESS); +} + +/* + * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct common_params +*timerlat_top_parse_args(int argc, char **argv) +{ + struct timerlat_params *params; + struct trace_events *tevent; + long long auto_thresh; + int retval; + int c; + char *trace_output = NULL; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + actions_init(¶ms->common.threshold_actions); + actions_init(¶ms->common.end_actions); + + /* disabled by default */ + params->dma_latency = -1; + + /* disabled by default */ + params->deepest_idle_state = -2; + + /* display data in microseconds */ + params->common.output_divisor = 1000; + + /* default to BPF mode */ + params->mode = TRACING_MODE_BPF; + + while (1) { + static struct option long_options[] = { + {"auto", required_argument, 0, 'a'}, + {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"event", required_argument, 0, 'e'}, + {"help", no_argument, 0, 'h'}, + {"house-keeping", required_argument, 0, 'H'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, + {"kernel-threads", no_argument, 0, 'k'}, + {"user-load", no_argument, 0, 'U'}, + {"trigger", required_argument, 0, '0'}, + {"filter", required_argument, 0, '1'}, + {"dma-latency", required_argument, 0, '2'}, + {"no-aa", no_argument, 0, '3'}, + {"dump-tasks", no_argument, 0, '4'}, + {"aa-only", required_argument, 0, '5'}, + {"warm-up", required_argument, 0, '6'}, + {"trace-buffer-size", required_argument, 0, '7'}, + {"deepest-idle-state", required_argument, 0, '8'}, + {"on-threshold", required_argument, 0, '9'}, + {"on-end", required_argument, 0, '\1'}, + {0, 0, 0, 0} + }; + + c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:knp:P:qs:t::T:uU0:1:2:345:6:7:", + long_options, NULL); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'a': + auto_thresh = get_llong_from_str(optarg); + + /* set thread stop to auto_thresh */ + params->common.stop_total_us = auto_thresh; + params->common.stop_us = auto_thresh; + + /* get stack trace */ + params->print_stack = auto_thresh; + + /* set trace */ + if (!trace_output) + trace_output = "timerlat_trace.txt"; + + break; + case '5': + /* it is here because it is similar to -a */ + auto_thresh = get_llong_from_str(optarg); + + /* set thread stop to auto_thresh */ + params->common.stop_total_us = auto_thresh; + params->common.stop_us = auto_thresh; + + /* get stack trace */ + params->print_stack = auto_thresh; + + /* set aa_only to avoid parsing the trace */ + params->common.aa_only = 1; + break; + case 'c': + retval = parse_cpu_set(optarg, ¶ms->common.monitored_cpus); + if (retval) + fatal("Invalid -c cpu list"); + params->common.cpus = optarg; + break; + case 'C': + params->common.cgroup = 1; + params->common.cgroup_name = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->common.duration = parse_seconds_duration(optarg); + if (!params->common.duration) + fatal("Invalid -d duration"); + break; + case 'e': + tevent = trace_event_alloc(optarg); + if (!tevent) + fatal("Error alloc trace event"); + + if (params->common.events) + tevent->next = params->common.events; + params->common.events = tevent; + break; + case 'h': + case '?': + timerlat_top_usage(); + break; + case 'H': + params->common.hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->common.hk_cpu_set); + if (retval) + fatal("Error parsing house keeping CPUs"); + break; + case 'i': + params->common.stop_us = get_llong_from_str(optarg); + break; + case 'k': + params->common.kernel_workload = true; + break; + case 'n': + params->common.output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + fatal("Period longer than 1 s"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->common.sched_param); + if (retval == -1) + fatal("Invalid -P priority"); + params->common.set_sched = 1; + break; + case 'q': + params->common.quiet = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->common.stop_total_us = get_llong_from_str(optarg); + break; + case 't': + trace_output = parse_optional_arg(argc, argv); + if (!trace_output) + trace_output = "timerlat_trace.txt"; + break; + case 'u': + params->common.user_workload = true; + /* fallback: -u implies -U */ + case 'U': + params->common.user_data = true; + break; + case '0': /* trigger */ + if (params->common.events) { + retval = trace_event_add_trigger(params->common.events, optarg); + if (retval) + fatal("Error adding trigger %s", optarg); + } else { + fatal("--trigger requires a previous -e"); + } + break; + case '1': /* filter */ + if (params->common.events) { + retval = trace_event_add_filter(params->common.events, optarg); + if (retval) + fatal("Error adding filter %s", optarg); + } else { + fatal("--filter requires a previous -e"); + } + break; + case '2': /* dma-latency */ + params->dma_latency = get_llong_from_str(optarg); + if (params->dma_latency < 0 || params->dma_latency > 10000) + fatal("--dma-latency needs to be >= 0 and < 10000"); + break; + case '3': /* no-aa */ + params->no_aa = 1; + break; + case '4': + params->dump_tasks = 1; + break; + case '6': + params->common.warmup = get_llong_from_str(optarg); + break; + case '7': + params->common.buffer_size = get_llong_from_str(optarg); + break; + case '8': + params->deepest_idle_state = get_llong_from_str(optarg); + break; + case '9': + retval = actions_parse(¶ms->common.threshold_actions, optarg, + "timerlat_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + case '\1': + retval = actions_parse(¶ms->common.end_actions, optarg, + "timerlat_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + default: + fatal("Invalid option"); + } + } + + if (trace_output) + actions_add_trace_output(¶ms->common.threshold_actions, trace_output); + + if (geteuid()) + fatal("rtla needs root permission"); + + /* + * Auto analysis only happens if stop tracing, thus: + */ + if (!params->common.stop_us && !params->common.stop_total_us) + params->no_aa = 1; + + if (params->no_aa && params->common.aa_only) + fatal("--no-aa and --aa-only are mutually exclusive!"); + + if (params->common.kernel_workload && params->common.user_workload) + fatal("--kernel-threads and --user-threads are mutually exclusive!"); + + /* + * If auto-analysis or trace output is enabled, switch from BPF mode to + * mixed mode + */ + if (params->mode == TRACING_MODE_BPF && + (params->common.threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->common.end_actions.present[ACTION_TRACE_OUTPUT] || + !params->no_aa)) + params->mode = TRACING_MODE_MIXED; + + return ¶ms->common; +} + +/* + * timerlat_top_apply_config - apply the top configs to the initialized tool + */ +static int +timerlat_top_apply_config(struct osnoise_tool *top) +{ + struct timerlat_params *params = to_timerlat_params(top->params); + int retval; + + retval = timerlat_apply_config(top, params); + if (retval) + goto out_err; + + if (isatty(STDOUT_FILENO) && !params->common.quiet) + params->common.pretty_output = 1; + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_top - initialize a timerlat top tool with parameters + */ +static struct osnoise_tool +*timerlat_init_top(struct common_params *params) +{ + struct osnoise_tool *top; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + top = osnoise_init_tool("timerlat_top"); + if (!top) + return NULL; + + top->data = timerlat_alloc_top(nr_cpus); + if (!top->data) + goto out_err; + + tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", + timerlat_top_handler, top); + + return top; + +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +/* + * timerlat_top_bpf_main_loop - main loop to process events (BPF variant) + */ +static int +timerlat_top_bpf_main_loop(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval, wait_retval; + + if (params->common.aa_only) { + /* Auto-analysis only, just wait for stop tracing */ + timerlat_bpf_wait(-1); + return 0; + } + + /* Pull and display data in a loop */ + while (!stop_tracing) { + wait_retval = timerlat_bpf_wait(params->common.quiet ? -1 : + params->common.sleep_time); + + retval = timerlat_top_bpf_pull_data(tool); + if (retval) { + err_msg("Error pulling BPF data\n"); + return retval; + } + + if (!params->common.quiet) + timerlat_print_stats(tool); + + if (wait_retval != 0) { + /* Stopping requested by tracer */ + actions_perform(¶ms->common.threshold_actions); + + if (!params->common.threshold_actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (tool->record) + trace_instance_start(&tool->record->trace); + if (tool->aa) + trace_instance_start(&tool->aa->trace); + timerlat_bpf_restart_tracing(); + } + + /* is there still any user-threads ? */ + if (params->common.user_workload) { + if (params->common.user.stopped_running) { + debug_msg("timerlat user space threads stopped!\n"); + break; + } + } + } + + return 0; +} + +static int timerlat_top_main_loop(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval; + + if (params->mode == TRACING_MODE_TRACEFS) { + retval = top_main_loop(tool); + } else { + retval = timerlat_top_bpf_main_loop(tool); + timerlat_bpf_detach(); + } + + return retval; +} + +struct tool_ops timerlat_top_ops = { + .tracer = "timerlat", + .comm_prefix = "timerlat/", + .parse_args = timerlat_top_parse_args, + .init_tool = timerlat_init_top, + .apply_config = timerlat_top_apply_config, + .enable = timerlat_enable, + .main = timerlat_top_main_loop, + .print_stats = timerlat_print_stats, + .analyze = timerlat_analyze, + .free = timerlat_free_top_tool, +}; diff --git a/tools/tracing/rtla/src/timerlat_u.c b/tools/tracing/rtla/src/timerlat_u.c new file mode 100644 index 000000000000..ce68e39d25fd --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_u.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <tracefs.h> +#include <pthread.h> +#include <sys/wait.h> +#include <sys/prctl.h> + +#include "utils.h" +#include "timerlat_u.h" + +/* + * This is the user-space main for the tool timerlatu/ threads. + * + * It is as simple as this: + * - set affinity + * - set priority + * - open tracer fd + * - spin + * - close + */ +static int timerlat_u_main(int cpu, struct timerlat_u_params *params) +{ + struct sched_param sp = { .sched_priority = 95 }; + char buffer[1024]; + int timerlat_fd; + cpu_set_t set; + int retval; + + /* + * This all is only setting up the tool. + */ + CPU_ZERO(&set); + CPU_SET(cpu, &set); + + retval = sched_setaffinity(gettid(), sizeof(set), &set); + if (retval == -1) { + debug_msg("Error setting user thread affinity %d, is the CPU online?\n", cpu); + exit(1); + } + + if (!params->sched_param) { + retval = sched_setscheduler(0, SCHED_FIFO, &sp); + if (retval < 0) + fatal("Error setting timerlat u default priority: %s", strerror(errno)); + } else { + retval = __set_sched_attr(getpid(), params->sched_param); + if (retval) { + /* __set_sched_attr prints an error message, so */ + exit(0); + } + } + + if (params->cgroup_name) { + retval = set_pid_cgroup(gettid(), params->cgroup_name); + if (!retval) { + err_msg("Error setting timerlat u cgroup pid\n"); + pthread_exit(&retval); + } + } + + /* + * This is the tool's loop. If you want to use as base for your own tool... + * go ahead. + */ + snprintf(buffer, sizeof(buffer), "osnoise/per_cpu/cpu%d/timerlat_fd", cpu); + + timerlat_fd = tracefs_instance_file_open(NULL, buffer, O_RDONLY); + if (timerlat_fd < 0) + fatal("Error opening %s:%s", buffer, strerror(errno)); + + debug_msg("User-space timerlat pid %d on cpu %d\n", gettid(), cpu); + + /* add should continue with a signal handler */ + while (true) { + retval = read(timerlat_fd, buffer, 1024); + if (retval < 0) + break; + } + + close(timerlat_fd); + + debug_msg("Leaving timerlat pid %d on cpu %d\n", gettid(), cpu); + exit(0); +} + +/* + * timerlat_u_send_kill - send a kill signal for all processes + * + * Return the number of processes that received the kill. + */ +static int timerlat_u_send_kill(pid_t *procs, int nr_cpus) +{ + int killed = 0; + int i, retval; + + for (i = 0; i < nr_cpus; i++) { + if (!procs[i]) + continue; + retval = kill(procs[i], SIGKILL); + if (!retval) + killed++; + else + err_msg("Error killing child process %d\n", procs[i]); + } + + return killed; +} + +/** + * timerlat_u_dispatcher - dispatch one timerlatu/ process per monitored CPU + * + * This is a thread main that will fork one new process for each monitored + * CPU. It will wait for: + * + * - rtla to tell to kill the child processes + * - some child process to die, and the cleanup all the processes + * + * whichever comes first. + * + */ +void *timerlat_u_dispatcher(void *data) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + struct timerlat_u_params *params = data; + char proc_name[128]; + int procs_count = 0; + int retval = 1; + pid_t *procs; + int wstatus; + pid_t pid; + int i; + + debug_msg("Dispatching timerlat u procs\n"); + + procs = calloc(nr_cpus, sizeof(pid_t)); + if (!procs) + pthread_exit(&retval); + + for (i = 0; i < nr_cpus; i++) { + if (params->set && !CPU_ISSET(i, params->set)) + continue; + + pid = fork(); + + /* child */ + if (!pid) { + + /* + * rename the process + */ + snprintf(proc_name, sizeof(proc_name), "timerlatu/%d", i); + pthread_setname_np(pthread_self(), proc_name); + prctl(PR_SET_NAME, (unsigned long)proc_name, 0, 0, 0); + + timerlat_u_main(i, params); + /* timerlat_u_main should exit()! Anyways... */ + pthread_exit(&retval); + } + + /* parent */ + if (pid == -1) { + timerlat_u_send_kill(procs, nr_cpus); + debug_msg("Failed to create child processes"); + pthread_exit(&retval); + } + + procs_count++; + procs[i] = pid; + } + + while (params->should_run) { + /* check if processes died */ + pid = waitpid(-1, &wstatus, WNOHANG); + if (pid != 0) { + for (i = 0; i < nr_cpus; i++) { + if (procs[i] == pid) { + procs[i] = 0; + procs_count--; + } + } + + if (!procs_count) + break; + } + + sleep(1); + } + + timerlat_u_send_kill(procs, nr_cpus); + + while (procs_count) { + pid = waitpid(-1, &wstatus, 0); + if (pid == -1) { + err_msg("Failed to monitor child processes"); + pthread_exit(&retval); + } + for (i = 0; i < nr_cpus; i++) { + if (procs[i] == pid) { + procs[i] = 0; + procs_count--; + } + } + } + + params->stopped_running = 1; + + free(procs); + retval = 0; + pthread_exit(&retval); + +} diff --git a/tools/tracing/rtla/src/timerlat_u.h b/tools/tracing/rtla/src/timerlat_u.h new file mode 100644 index 000000000000..661511908957 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_u.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +struct timerlat_u_params { + /* timerlat -> timerlat_u: user-space threads can keep running */ + int should_run; + /* timerlat_u -> timerlat: all timerlat_u threads left, no reason to continue */ + int stopped_running; + + /* threads config */ + cpu_set_t *set; + char *cgroup_name; + struct sched_attr *sched_param; +}; + +void *timerlat_u_dispatcher(void *data); diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c new file mode 100644 index 000000000000..69cbc48d53d3 --- /dev/null +++ b/tools/tracing/rtla/src/trace.c @@ -0,0 +1,588 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/sendfile.h> +#include <tracefs.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include "trace.h" +#include "utils.h" + +/* + * enable_tracer_by_name - enable a tracer on the given instance + */ +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name) +{ + enum tracefs_tracers tracer; + int retval; + + tracer = TRACEFS_TRACER_CUSTOM; + + debug_msg("Enabling %s tracer\n", tracer_name); + + retval = tracefs_tracer_set(inst, tracer, tracer_name); + if (retval < 0) { + if (errno == ENODEV) + err_msg("Tracer %s not found!\n", tracer_name); + + err_msg("Failed to enable the %s tracer\n", tracer_name); + return -1; + } + + return 0; +} + +/* + * disable_tracer - set nop tracer to the insta + */ +void disable_tracer(struct tracefs_instance *inst) +{ + enum tracefs_tracers t = TRACEFS_TRACER_NOP; + int retval; + + retval = tracefs_tracer_set(inst, t); + if (retval < 0) + err_msg("Oops, error disabling tracer\n"); +} + +/* + * create_instance - create a trace instance with *instance_name + */ +struct tracefs_instance *create_instance(char *instance_name) +{ + return tracefs_instance_create(instance_name); +} + +/* + * destroy_instance - remove a trace instance and free the data + */ +void destroy_instance(struct tracefs_instance *inst) +{ + tracefs_instance_destroy(inst); + tracefs_instance_free(inst); +} + +/* + * save_trace_to_file - save the trace output of the instance to the file + */ +int save_trace_to_file(struct tracefs_instance *inst, const char *filename) +{ + const char *file = "trace"; + mode_t mode = 0644; + char buffer[4096]; + int out_fd, in_fd; + int retval = -1; + + if (!inst || !filename) + return 0; + + in_fd = tracefs_instance_file_open(inst, file, O_RDONLY); + if (in_fd < 0) { + err_msg("Failed to open trace file\n"); + return -1; + } + + printf(" Saving trace to %s\n", filename); + out_fd = creat(filename, mode); + if (out_fd < 0) { + err_msg("Failed to create output file %s\n", filename); + goto out_close_in; + } + + do { + retval = read(in_fd, buffer, sizeof(buffer)); + if (retval <= 0) + goto out_close; + + retval = write(out_fd, buffer, retval); + if (retval < 0) + goto out_close; + } while (retval > 0); + + retval = 0; +out_close: + close(out_fd); +out_close_in: + close(in_fd); + return retval; +} + +/* + * collect_registered_events - call the existing callback function for the event + * + * If an event has a registered callback function, call it. + * Otherwise, ignore the event. + */ +int +collect_registered_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + struct trace_seq *s = trace->seq; + + trace->processed_events++; + + if (!event->handler) + return 0; + + event->handler(s, record, event, context); + + return 0; +} + +/* + * collect_missed_events - record number of missed events + * + * If rtla cannot keep up with events generated by tracer, events are going + * to fall out of the ring buffer. + * Collect how many events were missed so it can be reported to the user. + */ +static int +collect_missed_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + + if (trace->missed_events == UINT64_MAX) + return 0; + + if (record->missed_events > 0) + trace->missed_events += record->missed_events; + else + /* Events missed but no data on how many */ + trace->missed_events = UINT64_MAX; + + return 0; +} + +/* + * trace_instance_destroy - destroy and free a rtla trace instance + */ +void trace_instance_destroy(struct trace_instance *trace) +{ + if (trace->inst) { + disable_tracer(trace->inst); + destroy_instance(trace->inst); + trace->inst = NULL; + } + + if (trace->seq) { + free(trace->seq); + trace->seq = NULL; + } + + if (trace->tep) { + tep_free(trace->tep); + trace->tep = NULL; + } +} + +/* + * trace_instance_init - create an rtla trace instance + * + * It is more than the tracefs instance, as it contains other + * things required for the tracing, such as the local events and + * a seq file. + * + * Note that the trace instance is returned disabled. This allows + * the tool to apply some other configs, like setting priority + * to the kernel threads, before starting generating trace entries. + */ +int trace_instance_init(struct trace_instance *trace, char *tool_name) +{ + trace->seq = calloc(1, sizeof(*trace->seq)); + if (!trace->seq) + goto out_err; + + trace_seq_init(trace->seq); + + trace->inst = create_instance(tool_name); + if (!trace->inst) + goto out_err; + + trace->tep = tracefs_local_events(NULL); + if (!trace->tep) + goto out_err; + + /* + * Let the main enable the record after setting some other + * things such as the priority of the tracer's threads. + */ + tracefs_trace_off(trace->inst); + + /* + * Collect the number of events missed due to tracefs buffer + * overflow. + */ + trace->missed_events = 0; + tracefs_follow_missed_events(trace->inst, + collect_missed_events, + trace); + + trace->processed_events = 0; + + return 0; + +out_err: + trace_instance_destroy(trace); + return 1; +} + +/* + * trace_instance_start - start tracing a given rtla instance + */ +int trace_instance_start(struct trace_instance *trace) +{ + return tracefs_trace_on(trace->inst); +} + +/* + * trace_instance_stop - stop tracing a given rtla instance + */ +int trace_instance_stop(struct trace_instance *trace) +{ + return tracefs_trace_off(trace->inst); +} + +/* + * trace_events_free - free a list of trace events + */ +static void trace_events_free(struct trace_events *events) +{ + struct trace_events *tevent = events; + struct trace_events *free_event; + + while (tevent) { + free_event = tevent; + + tevent = tevent->next; + + if (free_event->filter) + free(free_event->filter); + if (free_event->trigger) + free(free_event->trigger); + free(free_event->system); + free(free_event); + } +} + +/* + * trace_event_alloc - alloc and parse a single trace event + */ +struct trace_events *trace_event_alloc(const char *event_string) +{ + struct trace_events *tevent; + + tevent = calloc(1, sizeof(*tevent)); + if (!tevent) + return NULL; + + tevent->system = strdup(event_string); + if (!tevent->system) { + free(tevent); + return NULL; + } + + tevent->event = strstr(tevent->system, ":"); + if (tevent->event) { + *tevent->event = '\0'; + tevent->event = &tevent->event[1]; + } + + return tevent; +} + +/* + * trace_event_add_filter - record an event filter + */ +int trace_event_add_filter(struct trace_events *event, char *filter) +{ + if (event->filter) + free(event->filter); + + event->filter = strdup(filter); + if (!event->filter) + return 1; + + return 0; +} + +/* + * trace_event_add_trigger - record an event trigger action + */ +int trace_event_add_trigger(struct trace_events *event, char *trigger) +{ + if (event->trigger) + free(event->trigger); + + event->trigger = strdup(trigger); + if (!event->trigger) + return 1; + + return 0; +} + +/* + * trace_event_disable_filter - disable an event filter + */ +static void trace_event_disable_filter(struct trace_instance *instance, + struct trace_events *tevent) +{ + char filter[1024]; + int retval; + + if (!tevent->filter) + return; + + if (!tevent->filter_enabled) + return; + + debug_msg("Disabling %s:%s filter %s\n", tevent->system, + tevent->event ? : "*", tevent->filter); + + snprintf(filter, 1024, "!%s\n", tevent->filter); + + retval = tracefs_event_file_write(instance->inst, tevent->system, + tevent->event, "filter", filter); + if (retval < 0) + err_msg("Error disabling %s:%s filter %s\n", tevent->system, + tevent->event ? : "*", tevent->filter); +} + +/* + * trace_event_save_hist - save the content of an event hist + * + * If the trigger is a hist: one, save the content of the hist file. + */ +static void trace_event_save_hist(struct trace_instance *instance, + struct trace_events *tevent) +{ + int retval, index, out_fd; + mode_t mode = 0644; + char path[1024]; + char *hist; + + if (!tevent) + return; + + /* trigger enables hist */ + if (!tevent->trigger) + return; + + /* is this a hist: trigger? */ + retval = strncmp(tevent->trigger, "hist:", strlen("hist:")); + if (retval) + return; + + snprintf(path, 1024, "%s_%s_hist.txt", tevent->system, tevent->event); + + printf(" Saving event %s:%s hist to %s\n", tevent->system, tevent->event, path); + + out_fd = creat(path, mode); + if (out_fd < 0) { + err_msg(" Failed to create %s output file\n", path); + return; + } + + hist = tracefs_event_file_read(instance->inst, tevent->system, tevent->event, "hist", 0); + if (!hist) { + err_msg(" Failed to read %s:%s hist file\n", tevent->system, tevent->event); + goto out_close; + } + + index = 0; + do { + index += write(out_fd, &hist[index], strlen(hist) - index); + } while (index < strlen(hist)); + + free(hist); +out_close: + close(out_fd); +} + +/* + * trace_event_disable_trigger - disable an event trigger + */ +static void trace_event_disable_trigger(struct trace_instance *instance, + struct trace_events *tevent) +{ + char trigger[1024]; + int retval; + + if (!tevent->trigger) + return; + + if (!tevent->trigger_enabled) + return; + + debug_msg("Disabling %s:%s trigger %s\n", tevent->system, + tevent->event ? : "*", tevent->trigger); + + trace_event_save_hist(instance, tevent); + + snprintf(trigger, 1024, "!%s\n", tevent->trigger); + + retval = tracefs_event_file_write(instance->inst, tevent->system, + tevent->event, "trigger", trigger); + if (retval < 0) + err_msg("Error disabling %s:%s trigger %s\n", tevent->system, + tevent->event ? : "*", tevent->trigger); +} + +/* + * trace_events_disable - disable all trace events + */ +void trace_events_disable(struct trace_instance *instance, + struct trace_events *events) +{ + struct trace_events *tevent = events; + + if (!events) + return; + + while (tevent) { + debug_msg("Disabling event %s:%s\n", tevent->system, tevent->event ? : "*"); + if (tevent->enabled) { + trace_event_disable_filter(instance, tevent); + trace_event_disable_trigger(instance, tevent); + tracefs_event_disable(instance->inst, tevent->system, tevent->event); + } + + tevent->enabled = 0; + tevent = tevent->next; + } +} + +/* + * trace_event_enable_filter - enable an event filter associated with an event + */ +static int trace_event_enable_filter(struct trace_instance *instance, + struct trace_events *tevent) +{ + char filter[1024]; + int retval; + + if (!tevent->filter) + return 0; + + if (!tevent->event) { + err_msg("Filter %s applies only for single events, not for all %s:* events\n", + tevent->filter, tevent->system); + return 1; + } + + snprintf(filter, 1024, "%s\n", tevent->filter); + + debug_msg("Enabling %s:%s filter %s\n", tevent->system, + tevent->event ? : "*", tevent->filter); + + retval = tracefs_event_file_write(instance->inst, tevent->system, + tevent->event, "filter", filter); + if (retval < 0) { + err_msg("Error enabling %s:%s filter %s\n", tevent->system, + tevent->event ? : "*", tevent->filter); + return 1; + } + + tevent->filter_enabled = 1; + return 0; +} + +/* + * trace_event_enable_trigger - enable an event trigger associated with an event + */ +static int trace_event_enable_trigger(struct trace_instance *instance, + struct trace_events *tevent) +{ + char trigger[1024]; + int retval; + + if (!tevent->trigger) + return 0; + + if (!tevent->event) { + err_msg("Trigger %s applies only for single events, not for all %s:* events\n", + tevent->trigger, tevent->system); + return 1; + } + + snprintf(trigger, 1024, "%s\n", tevent->trigger); + + debug_msg("Enabling %s:%s trigger %s\n", tevent->system, + tevent->event ? : "*", tevent->trigger); + + retval = tracefs_event_file_write(instance->inst, tevent->system, + tevent->event, "trigger", trigger); + if (retval < 0) { + err_msg("Error enabling %s:%s trigger %s\n", tevent->system, + tevent->event ? : "*", tevent->trigger); + return 1; + } + + tevent->trigger_enabled = 1; + + return 0; +} + +/* + * trace_events_enable - enable all events + */ +int trace_events_enable(struct trace_instance *instance, + struct trace_events *events) +{ + struct trace_events *tevent = events; + int retval; + + while (tevent) { + debug_msg("Enabling event %s:%s\n", tevent->system, tevent->event ? : "*"); + retval = tracefs_event_enable(instance->inst, tevent->system, tevent->event); + if (retval < 0) { + err_msg("Error enabling event %s:%s\n", tevent->system, + tevent->event ? : "*"); + return 1; + } + + retval = trace_event_enable_filter(instance, tevent); + if (retval) + return 1; + + retval = trace_event_enable_trigger(instance, tevent); + if (retval) + return 1; + + tevent->enabled = 1; + tevent = tevent->next; + } + + return 0; +} + +/* + * trace_events_destroy - disable and free all trace events + */ +void trace_events_destroy(struct trace_instance *instance, + struct trace_events *events) +{ + if (!events) + return; + + trace_events_disable(instance, events); + trace_events_free(events); +} + +/* + * trace_set_buffer_size - set the per-cpu tracing buffer size. + */ +int trace_set_buffer_size(struct trace_instance *trace, int size) +{ + int retval; + + debug_msg("Setting trace buffer size to %d Kb\n", size); + retval = tracefs_instance_set_buffer_size(trace->inst, size, -1); + if (retval) + err_msg("Error setting trace buffer size\n"); + + return retval; +} diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h new file mode 100644 index 000000000000..1e5aee4b828d --- /dev/null +++ b/tools/tracing/rtla/src/trace.h @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <tracefs.h> +#include <stddef.h> + +struct trace_events { + struct trace_events *next; + char *system; + char *event; + char *filter; + char *trigger; + char enabled; + char filter_enabled; + char trigger_enabled; +}; + +struct trace_instance { + struct tracefs_instance *inst; + struct tep_handle *tep; + struct trace_seq *seq; + unsigned long long missed_events; + unsigned long long processed_events; +}; + +int trace_instance_init(struct trace_instance *trace, char *tool_name); +int trace_instance_start(struct trace_instance *trace); +int trace_instance_stop(struct trace_instance *trace); +void trace_instance_destroy(struct trace_instance *trace); + +struct trace_seq *get_trace_seq(void); +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name); +void disable_tracer(struct tracefs_instance *inst); + +struct tracefs_instance *create_instance(char *instance_name); +void destroy_instance(struct tracefs_instance *inst); + +int save_trace_to_file(struct tracefs_instance *inst, const char *filename); +int collect_registered_events(struct tep_event *tep, struct tep_record *record, + int cpu, void *context); + +struct trace_events *trace_event_alloc(const char *event_string); +void trace_events_disable(struct trace_instance *instance, + struct trace_events *events); +void trace_events_destroy(struct trace_instance *instance, + struct trace_events *events); +int trace_events_enable(struct trace_instance *instance, + struct trace_events *events); + +int trace_event_add_filter(struct trace_events *event, char *filter); +int trace_event_add_trigger(struct trace_events *event, char *trigger); +int trace_set_buffer_size(struct trace_instance *trace, int size); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c new file mode 100644 index 000000000000..9cf5a0098e9a --- /dev/null +++ b/tools/tracing/rtla/src/utils.c @@ -0,0 +1,1002 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#ifdef HAVE_LIBCPUPOWER_SUPPORT +#include <cpuidle.h> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ +#include <dirent.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> + +#include "utils.h" + +#define MAX_MSG_LENGTH 1024 +int config_debug; + +/* + * err_msg - print an error message to the stderr + */ +void err_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * debug_msg - print a debug message to stderr if debug is set + */ +void debug_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + if (!config_debug) + return; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * fatal - print an error message and EOL to stderr and exit with ERROR + */ +void fatal(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + + exit(ERROR); +} + +/* + * get_llong_from_str - get a long long int from a string + */ +long long get_llong_from_str(char *start) +{ + long long value; + char *end; + + errno = 0; + value = strtoll(start, &end, 10); + if (errno || start == end) + return -1; + + return value; +} + +/* + * get_duration - fill output with a human readable duration since start_time + */ +void get_duration(time_t start_time, char *output, int output_size) +{ + time_t now = time(NULL); + struct tm *tm_info; + time_t duration; + + duration = difftime(now, start_time); + tm_info = gmtime(&duration); + + snprintf(output, output_size, "%3d %02d:%02d:%02d", + tm_info->tm_yday, + tm_info->tm_hour, + tm_info->tm_min, + tm_info->tm_sec); +} + +/* + * parse_cpu_set - parse a cpu_list filling cpu_set_t argument + * + * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set + * filling cpu_set_t argument. + * + * Returns 1 on success, 0 otherwise. + */ +int parse_cpu_set(char *cpu_list, cpu_set_t *set) +{ + const char *p; + int end_cpu; + int nr_cpus; + int cpu; + int i; + + CPU_ZERO(set); + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (p = cpu_list; *p; ) { + cpu = atoi(p); + if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) + goto err; + + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + end_cpu = atoi(p); + if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) + goto err; + while (isdigit(*p)) + p++; + } else + end_cpu = cpu; + + if (cpu == end_cpu) { + debug_msg("cpu_set: adding cpu %d\n", cpu); + CPU_SET(cpu, set); + } else { + for (i = cpu; i <= end_cpu; i++) { + debug_msg("cpu_set: adding cpu %d\n", i); + CPU_SET(i, set); + } + } + + if (*p == ',') + p++; + } + + return 0; +err: + debug_msg("Error parsing the cpu set %s\n", cpu_list); + return 1; +} + +/* + * parse_duration - parse duration with s/m/h/d suffix converting it to seconds + */ +long parse_seconds_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + switch (*end) { + case 's': + case 'S': + break; + case 'm': + case 'M': + t *= 60; + break; + case 'h': + case 'H': + t *= 60 * 60; + break; + + case 'd': + case 'D': + t *= 24 * 60 * 60; + break; + } + } + + return t; +} + +/* + * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds + */ +long parse_ns_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + if (!strncmp(end, "ns", 2)) { + return t; + } else if (!strncmp(end, "us", 2)) { + t *= 1000; + return t; + } else if (!strncmp(end, "ms", 2)) { + t *= 1000 * 1000; + return t; + } else if (!strncmp(end, "s", 1)) { + t *= 1000 * 1000 * 1000; + return t; + } + return -1; + } + + return t; +} + +/* + * This is a set of helper functions to use SCHED_DEADLINE. + */ +#ifndef __NR_sched_setattr +# ifdef __x86_64__ +# define __NR_sched_setattr 314 +# elif __i386__ +# define __NR_sched_setattr 351 +# elif __arm__ +# define __NR_sched_setattr 380 +# elif __aarch64__ || __riscv +# define __NR_sched_setattr 274 +# elif __powerpc__ +# define __NR_sched_setattr 355 +# elif __s390x__ +# define __NR_sched_setattr 345 +# elif __loongarch__ +# define __NR_sched_setattr 274 +# endif +#endif + +#define SCHED_DEADLINE 6 + +static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, + unsigned int flags) { + return syscall(__NR_sched_setattr, pid, attr, flags); +} + +int __set_sched_attr(int pid, struct sched_attr *attr) +{ + int flags = 0; + int retval; + + retval = syscall_sched_setattr(pid, attr, flags); + if (retval < 0) { + err_msg("Failed to set sched attributes to the pid %d: %s\n", + pid, strerror(errno)); + return 1; + } + + return 0; +} + +/* + * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm + * + * Check if the procfs entry is a directory of a process, and then check if the + * process has a comm with the prefix set in char *comm_prefix. As the + * current users of this function only check for kernel threads, there is no + * need to check for the threads for the process. + * + * Return: True if the proc_entry contains a comm file with comm_prefix*. + * Otherwise returns false. + */ +static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) +{ + char buffer[MAX_PATH]; + int comm_fd, retval; + char *t_name; + + if (proc_entry->d_type != DT_DIR) + return 0; + + if (*proc_entry->d_name == '.') + return 0; + + /* check if the string is a pid */ + for (t_name = proc_entry->d_name; t_name; t_name++) { + if (!isdigit(*t_name)) + break; + } + + if (*t_name != '\0') + return 0; + + snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); + comm_fd = open(buffer, O_RDONLY); + if (comm_fd < 0) + return 0; + + memset(buffer, 0, MAX_PATH); + retval = read(comm_fd, buffer, MAX_PATH); + + close(comm_fd); + + if (retval <= 0) + return 0; + + retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); + if (retval) + return 0; + + /* comm already have \n */ + debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); + + return 1; +} + +/* + * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix + * + * This function uses procfs to list the currently running threads and then set the + * sched_attr *attr to the threads that start with char *comm_prefix. It is + * mainly used to set the priority to the kernel threads created by the + * tracers. + */ +int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) +{ + struct dirent *proc_entry; + DIR *procfs; + int retval; + + if (strlen(comm_prefix) >= MAX_PATH) { + err_msg("Command prefix is too long: %d < strlen(%s)\n", + MAX_PATH, comm_prefix); + return 1; + } + + procfs = opendir("/proc"); + if (!procfs) { + err_msg("Could not open procfs\n"); + return 1; + } + + while ((proc_entry = readdir(procfs))) { + + retval = procfs_is_workload_pid(comm_prefix, proc_entry); + if (!retval) + continue; + + /* procfs_is_workload_pid confirmed it is a pid */ + retval = __set_sched_attr(atoi(proc_entry->d_name), attr); + if (retval) { + err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); + goto out_err; + } + + debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); + } + return 0; + +out_err: + closedir(procfs); + return 1; +} + +#define INVALID_VAL (~0L) +static long get_long_ns_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = parse_ns_duration(start); + + return val; +} + +static long get_long_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = get_llong_from_str(start); + + return val; +} + +/* + * parse priority in the format: + * SCHED_OTHER: + * o:<prio> + * O:<prio> + * SCHED_RR: + * r:<prio> + * R:<prio> + * SCHED_FIFO: + * f:<prio> + * F:<prio> + * SCHED_DEADLINE: + * d:runtime:period + * D:runtime:period + */ +int parse_prio(char *arg, struct sched_attr *sched_param) +{ + long prio; + long runtime; + long period; + + memset(sched_param, 0, sizeof(*sched_param)); + sched_param->size = sizeof(*sched_param); + + switch (arg[0]) { + case 'd': + case 'D': + /* d:runtime:period */ + if (strlen(arg) < 4) + return -1; + + runtime = get_long_ns_after_colon(arg); + if (runtime == INVALID_VAL) + return -1; + + period = get_long_ns_after_colon(&arg[2]); + if (period == INVALID_VAL) + return -1; + + if (runtime > period) + return -1; + + sched_param->sched_policy = SCHED_DEADLINE; + sched_param->sched_runtime = runtime; + sched_param->sched_deadline = period; + sched_param->sched_period = period; + break; + case 'f': + case 'F': + /* f:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_FIFO)) + return -1; + if (prio > sched_get_priority_max(SCHED_FIFO)) + return -1; + + sched_param->sched_policy = SCHED_FIFO; + sched_param->sched_priority = prio; + break; + case 'r': + case 'R': + /* r:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_RR)) + return -1; + if (prio > sched_get_priority_max(SCHED_RR)) + return -1; + + sched_param->sched_policy = SCHED_RR; + sched_param->sched_priority = prio; + break; + case 'o': + case 'O': + /* o:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < MIN_NICE) + return -1; + if (prio > MAX_NICE) + return -1; + + sched_param->sched_policy = SCHED_OTHER; + sched_param->sched_nice = prio; + break; + default: + return -1; + } + return 0; +} + +/* + * set_cpu_dma_latency - set the /dev/cpu_dma_latecy + * + * This is used to reduce the exit from idle latency. The value + * will be reset once the file descriptor of /dev/cpu_dma_latecy + * is closed. + * + * Return: the /dev/cpu_dma_latecy file descriptor + */ +int set_cpu_dma_latency(int32_t latency) +{ + int retval; + int fd; + + fd = open("/dev/cpu_dma_latency", O_RDWR); + if (fd < 0) { + err_msg("Error opening /dev/cpu_dma_latency\n"); + return -1; + } + + retval = write(fd, &latency, 4); + if (retval < 1) { + err_msg("Error setting /dev/cpu_dma_latency\n"); + close(fd); + return -1; + } + + debug_msg("Set /dev/cpu_dma_latency to %d\n", latency); + + return fd; +} + +#ifdef HAVE_LIBCPUPOWER_SUPPORT +static unsigned int **saved_cpu_idle_disable_state; +static size_t saved_cpu_idle_disable_state_alloc_ctr; + +/* + * save_cpu_idle_state_disable - save disable for all idle states of a cpu + * + * Saves the current disable of all idle states of a cpu, to be subsequently + * restored via restore_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int save_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int nr_cpus; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (saved_cpu_idle_disable_state == NULL) { + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); + if (!saved_cpu_idle_disable_state) + return -1; + } + + saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + saved_cpu_idle_disable_state_alloc_ctr++; + + for (state = 0; state < nr_states; state++) { + disabled = cpuidle_is_state_disabled(cpu, state); + if (disabled < 0) + return disabled; + saved_cpu_idle_disable_state[cpu][state] = disabled; + } + + return nr_states; +} + +/* + * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu + * + * Restores the current disable state of all idle states of a cpu that was + * previously saved by save_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int restore_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int result; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (!saved_cpu_idle_disable_state) + return -1; + + for (state = 0; state < nr_states; state++) { + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + disabled = saved_cpu_idle_disable_state[cpu][state]; + result = cpuidle_state_disable(cpu, state, disabled); + if (result < 0) + return result; + } + + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + saved_cpu_idle_disable_state_alloc_ctr--; + if (saved_cpu_idle_disable_state_alloc_ctr == 0) { + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; + } + + return nr_states; +} + +/* + * free_cpu_idle_disable_states - free saved idle state disable for all cpus + * + * Frees the memory used for storing cpu idle state disable for all cpus + * and states. + * + * Normally, the memory is freed automatically in + * restore_cpu_idle_disable_state; this is mostly for cleaning up after an + * error. + */ +void free_cpu_idle_disable_states(void) +{ + int cpu; + int nr_cpus; + + if (!saved_cpu_idle_disable_state) + return; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (cpu = 0; cpu < nr_cpus; cpu++) { + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + } + + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; +} + +/* + * set_deepest_cpu_idle_state - limit idle state of cpu + * + * Disables all idle states deeper than the one given in + * deepest_state (assuming states with higher number are deeper). + * + * This is used to reduce the exit from idle latency. Unlike + * set_cpu_dma_latency, it can disable idle states per cpu. + * + * Return: idle state count on success, negative on error + */ +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) +{ + unsigned int nr_states; + unsigned int state; + int result; + + nr_states = cpuidle_state_count(cpu); + + for (state = deepest_state + 1; state < nr_states; state++) { + result = cpuidle_state_disable(cpu, state, 1); + if (result < 0) + return result; + } + + return nr_states; +} +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * find_mount - find a the mount point of a given fs + * + * Returns 0 if mount is not found, otherwise return 1 and fill mp + * with the mount point. + */ +static const int find_mount(const char *fs, char *mp, int sizeof_mp) +{ + char mount_point[MAX_PATH+1]; + char type[100]; + int found = 0; + FILE *fp; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return 0; + + while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { + if (strcmp(type, fs) == 0) { + found = 1; + break; + } + } + fclose(fp); + + if (!found) + return 0; + + memset(mp, 0, sizeof_mp); + strncpy(mp, mount_point, sizeof_mp - 1); + + debug_msg("Fs %s found at %s\n", fs, mp); + return 1; +} + +/* + * get_self_cgroup - get the current thread cgroup path + * + * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: + * + * 0::/user.slice/user-0.slice/session-3.scope'\n' + * + * This function is interested in the content after the second : and before the '\n'. + * + * Returns 1 if a string was found, 0 otherwise. + */ +static int get_self_cgroup(char *self_cg, int sizeof_self_cg) +{ + char path[MAX_PATH], *start; + int fd, retval; + + snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); + + fd = open(path, O_RDONLY); + if (fd < 0) + return 0; + + retval = read(fd, path, MAX_PATH); + + close(fd); + + if (retval <= 0) + return 0; + + start = path; + + start = strstr(start, ":"); + if (!start) + return 0; + + /* skip ":" */ + start++; + + start = strstr(start, ":"); + if (!start) + return 0; + + /* skip ":" */ + start++; + + if (strlen(start) >= sizeof_self_cg) + return 0; + + snprintf(self_cg, sizeof_self_cg, "%s", start); + + /* Swap '\n' with '\0' */ + start = strstr(self_cg, "\n"); + + /* there must be '\n' */ + if (!start) + return 0; + + /* ok, it found a string after the second : and before the \n */ + *start = '\0'; + + return 1; +} + +/* + * set_comm_cgroup - Set cgroup to pid_t pid + * + * If cgroup argument is not NULL, the threads will move to the given cgroup. + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. + * + * Supports cgroup v2. + * + * Returns 1 on success, 0 otherwise. + */ +int set_pid_cgroup(pid_t pid, const char *cgroup) +{ + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; + char cgroup_procs[MAX_PATH]; + char pid_str[24]; + int retval; + int cg_fd; + + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); + if (!retval) { + err_msg("Did not find cgroupv2 mount point\n"); + return 0; + } + + if (!cgroup) { + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path)); + if (!retval) { + err_msg("Did not find self cgroup\n"); + return 0; + } + } else { + snprintf(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); + } + + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); + + debug_msg("Using cgroup path at: %s\n", cgroup_procs); + + cg_fd = open(cgroup_procs, O_RDWR); + if (cg_fd < 0) + return 0; + + snprintf(pid_str, sizeof(pid_str), "%d\n", pid); + + retval = write(cg_fd, pid_str, strlen(pid_str)); + if (retval < 0) + err_msg("Error setting cgroup attributes for pid:%s - %s\n", + pid_str, strerror(errno)); + else + debug_msg("Set cgroup attributes for pid:%s\n", pid_str); + + close(cg_fd); + + return (retval >= 0); +} + +/** + * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix + * + * If cgroup argument is not NULL, the threads will move to the given cgroup. + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. + * + * Supports cgroup v2. + * + * Returns 1 on success, 0 otherwise. + */ +int set_comm_cgroup(const char *comm_prefix, const char *cgroup) +{ + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; + char cgroup_procs[MAX_PATH]; + struct dirent *proc_entry; + DIR *procfs; + int retval; + int cg_fd; + + if (strlen(comm_prefix) >= MAX_PATH) { + err_msg("Command prefix is too long: %d < strlen(%s)\n", + MAX_PATH, comm_prefix); + return 0; + } + + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); + if (!retval) { + err_msg("Did not find cgroupv2 mount point\n"); + return 0; + } + + if (!cgroup) { + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path)); + if (!retval) { + err_msg("Did not find self cgroup\n"); + return 0; + } + } else { + snprintf(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); + } + + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); + + debug_msg("Using cgroup path at: %s\n", cgroup_procs); + + cg_fd = open(cgroup_procs, O_RDWR); + if (cg_fd < 0) + return 0; + + procfs = opendir("/proc"); + if (!procfs) { + err_msg("Could not open procfs\n"); + goto out_cg; + } + + while ((proc_entry = readdir(procfs))) { + + retval = procfs_is_workload_pid(comm_prefix, proc_entry); + if (!retval) + continue; + + retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); + if (retval < 0) { + err_msg("Error setting cgroup attributes for pid:%s - %s\n", + proc_entry->d_name, strerror(errno)); + goto out_procfs; + } + + debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); + } + + closedir(procfs); + close(cg_fd); + return 1; + +out_procfs: + closedir(procfs); +out_cg: + close(cg_fd); + return 0; +} + +/** + * auto_house_keeping - Automatically move rtla out of measurement threads + * + * Try to move rtla away from the tracer, if possible. + * + * Returns 1 on success, 0 otherwise. + */ +int auto_house_keeping(cpu_set_t *monitored_cpus) +{ + cpu_set_t rtla_cpus, house_keeping_cpus; + int retval; + + /* first get the CPUs in which rtla can actually run. */ + retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); + if (retval == -1) { + debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); + return 0; + } + + /* then check if the existing setup is already good. */ + CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); + if (!CPU_COUNT(&house_keeping_cpus)) { + debug_msg("rtla and the monitored CPUs do not share CPUs."); + debug_msg("Skipping auto house-keeping\n"); + return 1; + } + + /* remove the intersection */ + CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); + + /* get only those that rtla can run */ + CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); + + /* is there any cpu left? */ + if (!CPU_COUNT(&house_keeping_cpus)) { + debug_msg("Could not find any CPU for auto house-keeping\n"); + return 0; + } + + retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); + if (retval == -1) { + debug_msg("Could not set affinity for auto house-keeping\n"); + return 0; + } + + debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); + + return 1; +} + +/** + * parse_optional_arg - Parse optional argument value + * + * Parse optional argument value, which can be in the form of: + * -sarg, -s/--long=arg, -s/--long arg + * + * Returns arg value if found, NULL otherwise. + */ +char *parse_optional_arg(int argc, char **argv) +{ + if (optarg) { + if (optarg[0] == '=') { + /* skip the = */ + return &optarg[1]; + } else { + return optarg; + } + /* parse argument of form -s [arg] and --long [arg]*/ + } else if (optind < argc && argv[optind][0] != '-') { + /* consume optind */ + return argv[optind++]; + } else { + return NULL; + } +} diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h new file mode 100644 index 000000000000..091df4ba4587 --- /dev/null +++ b/tools/tracing/rtla/src/utils.h @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdint.h> +#include <time.h> +#include <sched.h> + +/* + * '18446744073709551615\0' + */ +#define BUFF_U64_STR_SIZE 24 +#define MAX_PATH 1024 +#define MAX_NICE 20 +#define MIN_NICE -19 + +#define container_of(ptr, type, member)({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)) ; }) + +extern int config_debug; +void debug_msg(const char *fmt, ...); +void err_msg(const char *fmt, ...); +void fatal(const char *fmt, ...); + +long parse_seconds_duration(char *val); +void get_duration(time_t start_time, char *output, int output_size); + +int parse_cpu_list(char *cpu_list, char **monitored_cpus); +char *parse_optional_arg(int argc, char **argv); +long long get_llong_from_str(char *start); + +static inline void +update_min(unsigned long long *a, unsigned long long *b) +{ + if (*a > *b) + *a = *b; +} + +static inline void +update_max(unsigned long long *a, unsigned long long *b) +{ + if (*a < *b) + *a = *b; +} + +static inline void +update_sum(unsigned long long *a, unsigned long long *b) +{ + *a += *b; +} + +#ifndef SCHED_ATTR_SIZE_VER0 +struct sched_attr { + uint32_t size; + uint32_t sched_policy; + uint64_t sched_flags; + int32_t sched_nice; + uint32_t sched_priority; + uint64_t sched_runtime; + uint64_t sched_deadline; + uint64_t sched_period; +}; +#endif /* SCHED_ATTR_SIZE_VER0 */ + +int parse_prio(char *arg, struct sched_attr *sched_param); +int parse_cpu_set(char *cpu_list, cpu_set_t *set); +int __set_sched_attr(int pid, struct sched_attr *attr); +int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); +int set_comm_cgroup(const char *comm_prefix, const char *cgroup); +int set_pid_cgroup(pid_t pid, const char *cgroup); +int set_cpu_dma_latency(int32_t latency); +#ifdef HAVE_LIBCPUPOWER_SUPPORT +int save_cpu_idle_disable_state(unsigned int cpu); +int restore_cpu_idle_disable_state(unsigned int cpu); +void free_cpu_idle_disable_states(void); +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state); +static inline int have_libcpupower_support(void) { return 1; } +#else +static inline int save_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline int restore_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline void free_cpu_idle_disable_states(void) { } +static inline int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state) { return -1; } +static inline int have_libcpupower_support(void) { return 0; } +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ +int auto_house_keeping(cpu_set_t *monitored_cpus); + +#define ns_to_usf(x) (((double)x/1000)) +#define ns_to_per(total, part) ((part * 100) / (double)total) + +enum result { + PASSED = 0, /* same as EXIT_SUCCESS */ + ERROR = 1, /* same as EXIT_FAILURE, an error in arguments */ + FAILED = 2, /* test hit the stop tracing condition */ +}; diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh new file mode 100644 index 000000000000..c7de3d6ed6a8 --- /dev/null +++ b/tools/tracing/rtla/tests/engine.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +test_begin() { + # Count tests to allow the test harness to double-check if all were + # included correctly. + ctr=0 + [ -z "$RTLA" ] && RTLA="./rtla" + [ -n "$TEST_COUNT" ] && echo "1..$TEST_COUNT" +} + +reset_osnoise() { + # Reset osnoise options to default and remove any dangling instances created + # by improperly exited rtla runs. + pushd /sys/kernel/tracing >/dev/null || return 1 + + # Remove dangling instances created by previous rtla run + echo 0 > tracing_thresh + cd instances + for i in osnoise_top osnoise_hist timerlat_top timerlat_hist + do + [ ! -d "$i" ] && continue + rmdir "$i" + done + + # Reset options to default + # Note: those are copied from the default values of osnoise_data + # in kernel/trace/trace_osnoise.c + cd ../osnoise + echo all > cpus + echo DEFAULTS > options + echo 1000000 > period_us + echo 0 > print_stack + echo 1000000 > runtime_us + echo 0 > stop_tracing_total_us + echo 0 > stop_tracing_us + echo 1000 > timerlat_period_us + + popd >/dev/null +} + +check() { + test_name=$0 + tested_command=$1 + expected_exitcode=${3:-0} + expected_output=$4 + unexpected_output=$5 + # Simple check: run rtla with given arguments and test exit code. + # If TEST_COUNT is set, run the test. Otherwise, just count. + ctr=$(($ctr + 1)) + if [ -n "$TEST_COUNT" ] + then + # Reset osnoise options before running test. + [ "$NO_RESET_OSNOISE" == 1 ] || reset_osnoise + # Run rtla; in case of failure, include its output as comment + # in the test results. + result=$(eval stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$? + failbuf='' + fail=0 + + # Test if the results matches if requested + if [ -n "$expected_output" ] && ! grep -qE "$expected_output" <<< "$result" + then + fail=1 + failbuf+=$(printf "# Output match failed: \"%s\"" "$expected_output") + failbuf+=$'\n' + fi + + if [ -n "$unexpected_output" ] && grep -qE "$unexpected_output" <<< "$result" + then + fail=1 + failbuf+=$(printf "# Output non-match failed: \"%s\"" "$unexpected_output") + failbuf+=$'\n' + fi + + if [ $exitcode -eq $expected_exitcode ] && [ $fail -eq 0 ] + then + echo "ok $ctr - $1" + else + # Add rtla output and exit code as comments in case of failure + echo "not ok $ctr - $1" + echo -n "$failbuf" + echo "$result" | col -b | while read line; do echo "# $line"; done + printf "#\n# exit code %s\n" $exitcode + fi + fi +} + +check_with_osnoise_options() { + # Do the same as "check", but with pre-set osnoise options. + # Note: rtla should reset the osnoise options, this is used to test + # if it indeed does so. + # Save original arguments + arg1=$1 + arg2=$2 + arg3=$3 + + # Apply osnoise options (if not dry run) + if [ -n "$TEST_COUNT" ] + then + [ "$NO_RESET_OSNOISE" == 1 ] || reset_osnoise + shift + shift + while shift + do + [ "$1" == "" ] && continue + option=$(echo $1 | cut -d '=' -f 1) + value=$(echo $1 | cut -d '=' -f 2) + echo "option: $option, value: $value" + echo "$value" > "/sys/kernel/tracing/osnoise/$option" || return 1 + done + fi + + NO_RESET_OSNOISE=1 check "$arg1" "$arg2" "$arg3" +} + +set_timeout() { + TIMEOUT="timeout -v -k 15s $1" +} + +unset_timeout() { + unset TIMEOUT +} + +set_no_reset_osnoise() { + NO_RESET_OSNOISE=1 +} + +unset_no_reset_osnoise() { + unset NO_RESET_OSNOISE +} + +test_end() { + # If running without TEST_COUNT, tests are not actually run, just + # counted. In that case, re-run the test with the correct count. + [ -z "$TEST_COUNT" ] && TEST_COUNT=$ctr exec bash $0 || true +} + +# Avoid any environmental discrepancies +export LC_ALL=C +unset_timeout diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t new file mode 100644 index 000000000000..23ce250a6852 --- /dev/null +++ b/tools/tracing/rtla/tests/hwnoise.t @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m + +check "verify help page" \ + "hwnoise --help" 0 "summary of hardware-related noise" +check "detect noise higher than one microsecond" \ + "hwnoise -c 0 -T 1 -d 5s -q" 0 +check "set the automatic trace mode" \ + "hwnoise -a 5 -d 10s" 2 "osnoise hit stop tracing" +check "set scheduling param to the osnoise tracer threads" \ + "hwnoise -P F:1 -c 0 -r 900000 -d 10s -q" +check "stop the trace if a single sample is higher than 1 us" \ + "hwnoise -s 1 -T 1 -t -d 10s" 2 "Saving trace to osnoise_trace.txt" +check "enable a trace event trigger" \ + "hwnoise -t -e osnoise:irq_noise --trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 10s" \ + 0 "Saving event osnoise:irq_noise hist to osnoise_irq_noise_hist.txt" + +test_end diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t new file mode 100644 index 000000000000..396334608920 --- /dev/null +++ b/tools/tracing/rtla/tests/osnoise.t @@ -0,0 +1,50 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m + +check "verify help page" \ + "osnoise --help" 0 "osnoise version" +check "verify the --priority/-P param" \ + "osnoise top -P F:1 -c 0 -r 900000 -d 10s -q -S 1 --on-threshold shell,command=\"tests/scripts/check-priority.sh osnoise/ SCHED_FIFO 1\"" \ + 2 "Priorities are set correctly" +check "verify the --stop/-s param" \ + "osnoise top -s 30 -T 1" 2 "osnoise hit stop tracing" +check "verify the --trace param" \ + "osnoise hist -s 30 -T 1 -t" 2 "Saving trace to osnoise_trace.txt" +check "verify the --entries/-E param" \ + "osnoise hist -P F:1 -c 0 -r 900000 -d 10s -b 10 -E 25" + +# Test setting default period by putting an absurdly high period +# and stopping on threshold. +# If default period is not set, this will time out. +check_with_osnoise_options "apply default period" \ + "osnoise hist -s 1" 2 period_us=600000000 + +# Actions tests +check "trace output through -t with custom filename" \ + "osnoise hist -S 2 -t custom_filename.txt" 2 "^ Saving trace to custom_filename.txt$" +check "trace output through --on-threshold trace" \ + "osnoise hist -S 2 --on-threshold trace" 2 "^ Saving trace to osnoise_trace.txt$" +check "trace output through --on-threshold trace with custom filename" \ + "osnoise hist -S 2 --on-threshold trace,file=custom_filename.txt" 2 "^ Saving trace to custom_filename.txt$" +check "exec command" \ + "osnoise hist -S 2 --on-threshold shell,command='echo TestOutput'" 2 "^TestOutput$" +check "multiple actions" \ + "osnoise hist -S 2 --on-threshold shell,command='echo -n 1' --on-threshold shell,command='echo 2'" 2 "^12$" +check "hist stop at failed action" \ + "osnoise hist -S 2 --on-threshold shell,command='echo -n 1; false' --on-threshold shell,command='echo -n 2'" 2 "^1# RTLA osnoise histogram$" +check "top stop at failed action" \ + "osnoise top -S 2 --on-threshold shell,command='echo -n abc; false' --on-threshold shell,command='echo -n defgh'" 2 "^abc" "defgh" +check "hist with continue" \ + "osnoise hist -S 2 -d 5s --on-threshold shell,command='echo TestOutput' --on-threshold continue" 0 "^TestOutput$" +check "top with continue" \ + "osnoise top -q -S 2 -d 5s --on-threshold shell,command='echo TestOutput' --on-threshold continue" 0 "^TestOutput$" +check "hist with trace output at end" \ + "osnoise hist -d 1s --on-end trace" 0 "^ Saving trace to osnoise_trace.txt$" +check "top with trace output at end" \ + "osnoise top -d 1s --on-end trace" 0 "^ Saving trace to osnoise_trace.txt$" + +test_end diff --git a/tools/tracing/rtla/tests/scripts/check-priority.sh b/tools/tracing/rtla/tests/scripts/check-priority.sh new file mode 100755 index 000000000000..79b702a34a96 --- /dev/null +++ b/tools/tracing/rtla/tests/scripts/check-priority.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +pids="$(pgrep ^$1)" || exit 1 +for pid in $pids +do + chrt -p $pid | cut -d ':' -f 2 | head -n1 | grep "^ $2\$" >/dev/null + chrt -p $pid | cut -d ':' -f 2 | tail -n1 | grep "^ $3\$" >/dev/null +done && echo "Priorities are set correctly" diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t new file mode 100644 index 000000000000..bbaa1897d8a8 --- /dev/null +++ b/tools/tracing/rtla/tests/timerlat.t @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m +timerlat_sample_event='/sys/kernel/tracing/events/osnoise/timerlat_sample' + +if ldd $RTLA | grep libbpf >/dev/null && [ -d "$timerlat_sample_event" ] +then + # rtla build with BPF and system supports BPF mode + no_bpf_options='0 1' +else + no_bpf_options='1' +fi + +# Do every test with and without BPF +for option in $no_bpf_options +do +export RTLA_NO_BPF=$option + +# Basic tests +check "verify help page" \ + "timerlat --help" 0 "timerlat version" +check "verify -s/--stack" \ + "timerlat top -s 3 -T 10 -t" 2 "Blocking thread stack trace" +check "verify -P/--priority" \ + "timerlat top -P F:1 -c 0 -d 10s -q -T 1 --on-threshold shell,command=\"tests/scripts/check-priority.sh timerlatu/ SCHED_FIFO 1\"" \ + 2 "Priorities are set correctly" +check "test in nanoseconds" \ + "timerlat top -i 2 -c 0 -n -d 10s" 2 "ns" +check "set the automatic trace mode" \ + "timerlat top -a 5" 2 "analyzing it" +check "dump tasks" \ + "timerlat top -a 5 --dump-tasks" 2 "Printing CPU tasks" +check "print the auto-analysis if hits the stop tracing condition" \ + "timerlat top --aa-only 5" 2 +check "disable auto-analysis" \ + "timerlat top -s 3 -T 10 -t --no-aa" 2 +check "verify -c/--cpus" \ + "timerlat hist -c 0 -d 10s" +check "hist test in nanoseconds" \ + "timerlat hist -i 2 -c 0 -n -d 10s" 2 "ns" + +# Actions tests +check "trace output through -t" \ + "timerlat hist -T 2 -t" 2 "^ Saving trace to timerlat_trace.txt$" +check "trace output through -t with custom filename" \ + "timerlat hist -T 2 -t custom_filename.txt" 2 "^ Saving trace to custom_filename.txt$" +check "trace output through --on-threshold trace" \ + "timerlat hist -T 2 --on-threshold trace" 2 "^ Saving trace to timerlat_trace.txt$" +check "trace output through --on-threshold trace with custom filename" \ + "timerlat hist -T 2 --on-threshold trace,file=custom_filename.txt" 2 "^ Saving trace to custom_filename.txt$" +check "exec command" \ + "timerlat hist -T 2 --on-threshold shell,command='echo TestOutput'" 2 "^TestOutput$" +check "multiple actions" \ + "timerlat hist -T 2 --on-threshold shell,command='echo -n 1' --on-threshold shell,command='echo 2'" 2 "^12$" +check "hist stop at failed action" \ + "timerlat hist -T 2 --on-threshold shell,command='echo -n 1; false' --on-threshold shell,command='echo -n 2'" 2 "^1# RTLA timerlat histogram$" +check "top stop at failed action" \ + "timerlat top -T 2 --on-threshold shell,command='echo -n abc; false' --on-threshold shell,command='echo -n defgh'" 2 "^abc" "defgh" +check "hist with continue" \ + "timerlat hist -T 2 -d 5s --on-threshold shell,command='echo TestOutput' --on-threshold continue" 0 "^TestOutput$" +check "top with continue" \ + "timerlat top -q -T 2 -d 5s --on-threshold shell,command='echo TestOutput' --on-threshold continue" 0 "^TestOutput$" +check "hist with trace output at end" \ + "timerlat hist -d 1s --on-end trace" 0 "^ Saving trace to timerlat_trace.txt$" +check "top with trace output at end" \ + "timerlat top -d 1s --on-end trace" 0 "^ Saving trace to timerlat_trace.txt$" +done + +test_end |
