diff options
Diffstat (limited to 'tools/tracing/rtla/src')
25 files changed, 5287 insertions, 1409 deletions
diff --git a/tools/tracing/rtla/src/Build b/tools/tracing/rtla/src/Build new file mode 100644 index 000000000000..329e24a40cf7 --- /dev/null +++ b/tools/tracing/rtla/src/Build @@ -0,0 +1,14 @@ +rtla-y += trace.o +rtla-y += utils.o +rtla-y += actions.o +rtla-y += common.o +rtla-y += osnoise.o +rtla-y += osnoise_top.o +rtla-y += osnoise_hist.o +rtla-y += timerlat.o +rtla-y += timerlat_top.o +rtla-y += timerlat_hist.o +rtla-y += timerlat_u.o +rtla-y += timerlat_aa.o +rtla-y += timerlat_bpf.o +rtla-y += rtla.o diff --git a/tools/tracing/rtla/src/actions.c b/tools/tracing/rtla/src/actions.c new file mode 100644 index 000000000000..8945aee58d51 --- /dev/null +++ b/tools/tracing/rtla/src/actions.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> + +#include "actions.h" +#include "trace.h" +#include "utils.h" + +/* + * actions_init - initialize struct actions + */ +void +actions_init(struct actions *self) +{ + self->size = action_default_size; + self->list = calloc(self->size, sizeof(struct action)); + self->len = 0; + self->continue_flag = false; + + memset(&self->present, 0, sizeof(self->present)); + + /* This has to be set by the user */ + self->trace_output_inst = NULL; +} + +/* + * actions_destroy - destroy struct actions + */ +void +actions_destroy(struct actions *self) +{ + /* Free any action-specific data */ + for (struct action *action = self->list; action < self->list + self->len; action++) { + if (action->type == ACTION_SHELL) + free(action->command); + if (action->type == ACTION_TRACE_OUTPUT) + free(action->trace_output); + } + + /* Free action list */ + free(self->list); +} + +/* + * actions_new - Get pointer to new action + */ +static struct action * +actions_new(struct actions *self) +{ + if (self->len >= self->size) { + self->size *= 2; + self->list = realloc(self->list, self->size * sizeof(struct action)); + } + + return &self->list[self->len++]; +} + +/* + * actions_add_trace_output - add an action to output trace + */ +int +actions_add_trace_output(struct actions *self, const char *trace_output) +{ + struct action *action = actions_new(self); + + self->present[ACTION_TRACE_OUTPUT] = true; + action->type = ACTION_TRACE_OUTPUT; + action->trace_output = calloc(strlen(trace_output) + 1, sizeof(char)); + if (!action->trace_output) + return -1; + strcpy(action->trace_output, trace_output); + + return 0; +} + +/* + * actions_add_trace_output - add an action to send signal to a process + */ +int +actions_add_signal(struct actions *self, int signal, int pid) +{ + struct action *action = actions_new(self); + + self->present[ACTION_SIGNAL] = true; + action->type = ACTION_SIGNAL; + action->signal = signal; + action->pid = pid; + + return 0; +} + +/* + * actions_add_shell - add an action to execute a shell command + */ +int +actions_add_shell(struct actions *self, const char *command) +{ + struct action *action = actions_new(self); + + self->present[ACTION_SHELL] = true; + action->type = ACTION_SHELL; + action->command = calloc(strlen(command) + 1, sizeof(char)); + if (!action->command) + return -1; + strcpy(action->command, command); + + return 0; +} + +/* + * actions_add_continue - add an action to resume measurement + */ +int +actions_add_continue(struct actions *self) +{ + struct action *action = actions_new(self); + + self->present[ACTION_CONTINUE] = true; + action->type = ACTION_CONTINUE; + + return 0; +} + +/* + * actions_parse - add an action based on text specification + */ +int +actions_parse(struct actions *self, const char *trigger, const char *tracefn) +{ + enum action_type type = ACTION_NONE; + const char *token; + char trigger_c[strlen(trigger) + 1]; + + /* For ACTION_SIGNAL */ + int signal = 0, pid = 0; + + /* For ACTION_TRACE_OUTPUT */ + const char *trace_output; + + strcpy(trigger_c, trigger); + token = strtok(trigger_c, ","); + + if (strcmp(token, "trace") == 0) + type = ACTION_TRACE_OUTPUT; + else if (strcmp(token, "signal") == 0) + type = ACTION_SIGNAL; + else if (strcmp(token, "shell") == 0) + type = ACTION_SHELL; + else if (strcmp(token, "continue") == 0) + type = ACTION_CONTINUE; + else + /* Invalid trigger type */ + return -1; + + token = strtok(NULL, ","); + + switch (type) { + case ACTION_TRACE_OUTPUT: + /* Takes no argument */ + if (token == NULL) + trace_output = tracefn; + else { + if (strlen(token) > 5 && strncmp(token, "file=", 5) == 0) { + trace_output = token + 5; + } else { + /* Invalid argument */ + return -1; + } + + token = strtok(NULL, ","); + if (token != NULL) + /* Only one argument allowed */ + return -1; + } + return actions_add_trace_output(self, trace_output); + case ACTION_SIGNAL: + /* Takes two arguments, num (signal) and pid */ + while (token != NULL) { + if (strlen(token) > 4 && strncmp(token, "num=", 4) == 0) { + signal = atoi(token + 4); + } else if (strlen(token) > 4 && strncmp(token, "pid=", 4) == 0) { + if (strncmp(token + 4, "parent", 7) == 0) + pid = -1; + else + pid = atoi(token + 4); + } else { + /* Invalid argument */ + return -1; + } + + token = strtok(NULL, ","); + } + + if (!signal || !pid) + /* Missing argument */ + return -1; + + return actions_add_signal(self, signal, pid); + case ACTION_SHELL: + if (token == NULL) + return -1; + if (strlen(token) > 8 && strncmp(token, "command=", 8) == 0) + return actions_add_shell(self, token + 8); + return -1; + case ACTION_CONTINUE: + /* Takes no argument */ + if (token != NULL) + return -1; + return actions_add_continue(self); + default: + return -1; + } +} + +/* + * actions_perform - perform all actions + */ +int +actions_perform(struct actions *self) +{ + int pid, retval; + const struct action *action; + + for (action = self->list; action < self->list + self->len; action++) { + switch (action->type) { + case ACTION_TRACE_OUTPUT: + retval = save_trace_to_file(self->trace_output_inst, action->trace_output); + if (retval) { + err_msg("Error saving trace\n"); + return retval; + } + break; + case ACTION_SIGNAL: + if (action->pid == -1) + pid = getppid(); + else + pid = action->pid; + retval = kill(pid, action->signal); + if (retval) { + err_msg("Error sending signal\n"); + return retval; + } + break; + case ACTION_SHELL: + retval = system(action->command); + if (retval) + return retval; + break; + case ACTION_CONTINUE: + self->continue_flag = true; + return 0; + default: + break; + } + } + + return 0; +} diff --git a/tools/tracing/rtla/src/actions.h b/tools/tracing/rtla/src/actions.h new file mode 100644 index 000000000000..a4f9b570775b --- /dev/null +++ b/tools/tracing/rtla/src/actions.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <tracefs.h> +#include <stdbool.h> + +enum action_type { + ACTION_NONE = 0, + ACTION_TRACE_OUTPUT, + ACTION_SIGNAL, + ACTION_SHELL, + ACTION_CONTINUE, + ACTION_FIELD_N +}; + +struct action { + enum action_type type; + union { + struct { + /* For ACTION_TRACE_OUTPUT */ + char *trace_output; + }; + struct { + /* For ACTION_SIGNAL */ + int signal; + int pid; + }; + struct { + /* For ACTION_SHELL */ + char *command; + }; + }; +}; + +static const int action_default_size = 8; + +struct actions { + struct action *list; + int len, size; + bool present[ACTION_FIELD_N]; + bool continue_flag; + + /* External dependencies */ + struct tracefs_instance *trace_output_inst; +}; + +void actions_init(struct actions *self); +void actions_destroy(struct actions *self); +int actions_add_trace_output(struct actions *self, const char *trace_output); +int actions_add_signal(struct actions *self, int signal, int pid); +int actions_add_shell(struct actions *self, const char *command); +int actions_add_continue(struct actions *self); +int actions_parse(struct actions *self, const char *trigger, const char *tracefn); +int actions_perform(struct actions *self); diff --git a/tools/tracing/rtla/src/common.c b/tools/tracing/rtla/src/common.c new file mode 100644 index 000000000000..b197037fc58b --- /dev/null +++ b/tools/tracing/rtla/src/common.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <pthread.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include "common.h" + +struct trace_instance *trace_inst; +int stop_tracing; + +static void stop_trace(int sig) +{ + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(trace_inst->inst); + return; + } + stop_tracing = 1; + if (trace_inst) + trace_instance_stop(trace_inst); +} + +/* + * set_signals - handles the signal to stop the tool + */ +static void set_signals(struct common_params *params) +{ + signal(SIGINT, stop_trace); + if (params->duration) { + signal(SIGALRM, stop_trace); + alarm(params->duration); + } +} + +/* + * common_apply_config - apply common configs to the initialized tool + */ +int +common_apply_config(struct osnoise_tool *tool, struct common_params *params) +{ + int retval, i; + + if (!params->sleep_time) + params->sleep_time = 1; + + retval = osnoise_set_cpus(tool->context, params->cpus ? params->cpus : "all"); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + + if (!params->cpus) { + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) + CPU_SET(i, ¶ms->monitored_cpus); + } + + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + /* + * Set workload according to type of thread if the kernel supports it. + * On kernels without support, user threads will have already failed + * on missing fd, and kernel threads do not need it. + */ + retval = osnoise_set_workload(tool->context, params->kernel_workload); + if (retval < -1) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; + } + + return 0; + +out_err: + return -1; +} + + +int run_tool(struct tool_ops *ops, int argc, char *argv[]) +{ + struct common_params *params; + enum result return_value = ERROR; + struct osnoise_tool *tool; + bool stopped; + int retval; + + params = ops->parse_args(argc, argv); + if (!params) + exit(1); + + tool = ops->init_tool(params); + if (!tool) { + err_msg("Could not init osnoise tool\n"); + goto out_exit; + } + tool->ops = ops; + tool->params = params; + + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + trace_inst = &tool->trace; + + retval = ops->apply_config(tool); + if (retval) { + err_msg("Could not apply config\n"); + goto out_free; + } + + retval = enable_tracer_by_name(trace_inst->inst, ops->tracer); + if (retval) { + err_msg("Failed to enable %s tracer\n", ops->tracer); + goto out_free; + } + + if (params->set_sched) { + retval = set_comm_sched_attr(ops->comm_prefix, ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_free; + } + } + + if (params->cgroup && !params->user_data) { + retval = set_comm_cgroup(ops->comm_prefix, params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + + + if (params->threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->end_actions.present[ACTION_TRACE_OUTPUT]) { + tool->record = osnoise_init_trace_tool(ops->tracer); + if (!tool->record) { + err_msg("Failed to enable the trace instance\n"); + goto out_free; + } + params->threshold_actions.trace_output_inst = tool->record->trace.inst; + params->end_actions.trace_output_inst = tool->record->trace.inst; + + if (params->events) { + retval = trace_events_enable(&tool->record->trace, params->events); + if (retval) + goto out_trace; + } + + if (params->buffer_size > 0) { + retval = trace_set_buffer_size(&tool->record->trace, params->buffer_size); + if (retval) + goto out_trace; + } + } + + if (params->user_workload) { + pthread_t user_thread; + + /* rtla asked to stop */ + params->user.should_run = 1; + /* all threads left */ + params->user.stopped_running = 0; + + params->user.set = ¶ms->monitored_cpus; + if (params->set_sched) + params->user.sched_param = ¶ms->sched_param; + else + params->user.sched_param = NULL; + + params->user.cgroup_name = params->cgroup_name; + + retval = pthread_create(&user_thread, NULL, timerlat_u_dispatcher, ¶ms->user); + if (retval) + err_msg("Error creating timerlat user-space threads\n"); + } + + retval = ops->enable(tool); + if (retval) + goto out_trace; + + tool->start_time = time(NULL); + set_signals(params); + + retval = ops->main(tool); + if (retval) + goto out_trace; + + if (params->user_workload && !params->user.stopped_running) { + params->user.should_run = 0; + sleep(1); + } + + ops->print_stats(tool); + + actions_perform(¶ms->end_actions); + + return_value = PASSED; + + stopped = osnoise_trace_is_off(tool, tool->record) && !stop_tracing; + if (stopped) { + printf("%s hit stop tracing\n", ops->tracer); + return_value = FAILED; + } + + if (ops->analyze) + ops->analyze(tool, stopped); + +out_trace: + trace_events_destroy(&tool->record->trace, params->events); + params->events = NULL; +out_free: + ops->free(tool); + osnoise_destroy_tool(tool->record); + osnoise_destroy_tool(tool); + actions_destroy(¶ms->threshold_actions); + actions_destroy(¶ms->end_actions); + free(params); +out_exit: + exit(return_value); +} + +int top_main_loop(struct osnoise_tool *tool) +{ + struct common_params *params = tool->params; + struct trace_instance *trace = &tool->trace; + struct osnoise_tool *record = tool->record; + int retval; + + while (!stop_tracing) { + sleep(params->sleep_time); + + if (params->aa_only && !osnoise_trace_is_off(tool, record)) + continue; + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + return retval; + } + + if (!params->quiet) + tool->ops->print_stats(tool); + + if (osnoise_trace_is_off(tool, record)) { + if (stop_tracing) + /* stop tracing requested, do not perform actions */ + return 0; + + actions_perform(¶ms->threshold_actions); + + if (!params->threshold_actions.continue_flag) + /* continue flag not set, break */ + return 0; + + /* continue action reached, re-enable tracing */ + if (record) + trace_instance_start(&record->trace); + if (tool->aa) + trace_instance_start(&tool->aa->trace); + trace_instance_start(trace); + } + + /* is there still any user-threads ? */ + if (params->user_workload) { + if (params->user.stopped_running) { + debug_msg("timerlat user space threads stopped!\n"); + break; + } + } + } + + return 0; +} + +int hist_main_loop(struct osnoise_tool *tool) +{ + struct common_params *params = tool->params; + struct trace_instance *trace = &tool->trace; + int retval = 0; + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + break; + } + + if (osnoise_trace_is_off(tool, tool->record)) { + if (stop_tracing) + /* stop tracing requested, do not perform actions */ + break; + + actions_perform(¶ms->threshold_actions); + + if (!params->threshold_actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (tool->record) + trace_instance_start(&tool->record->trace); + if (tool->aa) + trace_instance_start(&tool->aa->trace); + trace_instance_start(&tool->trace); + } + + /* is there still any user-threads ? */ + if (params->user_workload) { + if (params->user.stopped_running) { + debug_msg("user-space threads stopped!\n"); + break; + } + } + } + + return retval; +} diff --git a/tools/tracing/rtla/src/common.h b/tools/tracing/rtla/src/common.h new file mode 100644 index 000000000000..9ec2b7632c37 --- /dev/null +++ b/tools/tracing/rtla/src/common.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#pragma once + +#include "actions.h" +#include "timerlat_u.h" +#include "trace.h" +#include "utils.h" + +/* + * osnoise_context - read, store, write, restore osnoise configs. + */ +struct osnoise_context { + int flags; + int ref; + + char *curr_cpus; + char *orig_cpus; + + /* 0 as init value */ + unsigned long long orig_runtime_us; + unsigned long long runtime_us; + + /* 0 as init value */ + unsigned long long orig_period_us; + unsigned long long period_us; + + /* 0 as init value */ + long long orig_timerlat_period_us; + long long timerlat_period_us; + + /* 0 as init value */ + long long orig_tracing_thresh; + long long tracing_thresh; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_us; + long long stop_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_total_us; + long long stop_total_us; + + /* -1 as init value because 0 is disabled */ + long long orig_print_stack; + long long print_stack; + + /* -1 as init value because 0 is off */ + int orig_opt_irq_disable; + int opt_irq_disable; + + /* -1 as init value because 0 is off */ + int orig_opt_workload; + int opt_workload; +}; + +extern struct trace_instance *trace_inst; +extern int stop_tracing; + +struct hist_params { + char no_irq; + char no_thread; + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +/* + * common_params - Parameters shared between timerlat_params and osnoise_params + */ +struct common_params { + /* trace configuration */ + char *cpus; + cpu_set_t monitored_cpus; + struct trace_events *events; + int buffer_size; + + /* Timing parameters */ + int warmup; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + + /* Scheduling parameters */ + int set_sched; + struct sched_attr sched_param; + int cgroup; + char *cgroup_name; + int hk_cpus; + cpu_set_t hk_cpu_set; + + /* Other parameters */ + struct hist_params hist; + int output_divisor; + int pretty_output; + int quiet; + int user_workload; + int kernel_workload; + int user_data; + int aa_only; + + struct actions threshold_actions; + struct actions end_actions; + struct timerlat_u_params user; +}; + +#define for_each_monitored_cpu(cpu, nr_cpus, common) \ + for (cpu = 0; cpu < nr_cpus; cpu++) \ + if (!(common)->cpus || CPU_ISSET(cpu, &(common)->monitored_cpus)) + +struct tool_ops; + +/* + * osnoise_tool - osnoise based tool definition. + * + * Only the "trace" and "context" fields are used for + * the additional trace instances (record and aa). + */ +struct osnoise_tool { + struct tool_ops *ops; + struct trace_instance trace; + struct osnoise_context *context; + void *data; + struct common_params *params; + time_t start_time; + struct osnoise_tool *record; + struct osnoise_tool *aa; +}; + +struct tool_ops { + const char *tracer; + const char *comm_prefix; + struct common_params *(*parse_args)(int argc, char *argv[]); + struct osnoise_tool *(*init_tool)(struct common_params *params); + int (*apply_config)(struct osnoise_tool *tool); + int (*enable)(struct osnoise_tool *tool); + int (*main)(struct osnoise_tool *tool); + void (*print_stats)(struct osnoise_tool *tool); + void (*analyze)(struct osnoise_tool *tool, bool stopped); + void (*free)(struct osnoise_tool *tool); +}; + +int osnoise_set_cpus(struct osnoise_context *context, char *cpus); +void osnoise_restore_cpus(struct osnoise_context *context); + +int osnoise_set_workload(struct osnoise_context *context, bool onoff); + +void osnoise_destroy_tool(struct osnoise_tool *top); +struct osnoise_tool *osnoise_init_tool(char *tool_name); +struct osnoise_tool *osnoise_init_trace_tool(const char *tracer); +bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record); + +int common_apply_config(struct osnoise_tool *tool, struct common_params *params); +int top_main_loop(struct osnoise_tool *tool); +int hist_main_loop(struct osnoise_tool *tool); diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index 4dee343909b1..312c511fa004 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <sys/types.h> #include <sys/stat.h> #include <pthread.h> @@ -12,9 +13,12 @@ #include <errno.h> #include <fcntl.h> #include <stdio.h> +#include <sched.h> #include "osnoise.h" -#include "utils.h" + +#define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ +#define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ /* * osnoise_get_cpus - return the original "osnoise/cpus" content @@ -734,20 +738,172 @@ void osnoise_put_tracing_thresh(struct osnoise_context *context) context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL; } -/* - * enable_osnoise - enable osnoise tracer in the trace_instance - */ -int enable_osnoise(struct trace_instance *trace) +static int osnoise_options_get_option(char *option) +{ + char *options = tracefs_instance_file_read(NULL, "osnoise/options", NULL); + char no_option[128]; + int retval = 0; + char *opt; + + if (!options) + return OSNOISE_OPTION_INIT_VAL; + + /* + * Check first if the option is disabled. + */ + snprintf(no_option, sizeof(no_option), "NO_%s", option); + + opt = strstr(options, no_option); + if (opt) + goto out_free; + + /* + * Now that it is not disabled, if the string is there, it is + * enabled. If the string is not there, the option does not exist. + */ + opt = strstr(options, option); + if (opt) + retval = 1; + else + retval = OSNOISE_OPTION_INIT_VAL; + +out_free: + free(options); + return retval; +} + +static int osnoise_options_set_option(char *option, bool onoff) { - return enable_tracer_by_name(trace->inst, "osnoise"); + char no_option[128]; + + if (onoff) + return tracefs_instance_file_write(NULL, "osnoise/options", option); + + snprintf(no_option, sizeof(no_option), "NO_%s", option); + + return tracefs_instance_file_write(NULL, "osnoise/options", no_option); } -/* - * enable_timerlat - enable timerlat tracer in the trace_instance - */ -int enable_timerlat(struct trace_instance *trace) +static int osnoise_get_irq_disable(struct osnoise_context *context) +{ + if (context->opt_irq_disable != OSNOISE_OPTION_INIT_VAL) + return context->opt_irq_disable; + + if (context->orig_opt_irq_disable != OSNOISE_OPTION_INIT_VAL) + return context->orig_opt_irq_disable; + + context->orig_opt_irq_disable = osnoise_options_get_option("OSNOISE_IRQ_DISABLE"); + + return context->orig_opt_irq_disable; +} + +int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff) +{ + int opt_irq_disable = osnoise_get_irq_disable(context); + int retval; + + if (opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return -1; + + if (opt_irq_disable == onoff) + return 0; + + retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", onoff); + if (retval < 0) + return -1; + + context->opt_irq_disable = onoff; + + return 0; +} + +static void osnoise_restore_irq_disable(struct osnoise_context *context) +{ + int retval; + + if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_opt_irq_disable == context->opt_irq_disable) + goto out_done; + + retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", context->orig_opt_irq_disable); + if (retval < 0) + err_msg("Could not restore original OSNOISE_IRQ_DISABLE option\n"); + +out_done: + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; +} + +static void osnoise_put_irq_disable(struct osnoise_context *context) { - return enable_tracer_by_name(trace->inst, "timerlat"); + osnoise_restore_irq_disable(context); + + if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; +} + +static int osnoise_get_workload(struct osnoise_context *context) +{ + if (context->opt_workload != OSNOISE_OPTION_INIT_VAL) + return context->opt_workload; + + if (context->orig_opt_workload != OSNOISE_OPTION_INIT_VAL) + return context->orig_opt_workload; + + context->orig_opt_workload = osnoise_options_get_option("OSNOISE_WORKLOAD"); + + return context->orig_opt_workload; +} + +int osnoise_set_workload(struct osnoise_context *context, bool onoff) +{ + int opt_workload = osnoise_get_workload(context); + int retval; + + if (opt_workload == OSNOISE_OPTION_INIT_VAL) + return -1; + + if (opt_workload == onoff) + return 0; + + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff); + if (retval < 0) + return -2; + + context->opt_workload = onoff; + + return 0; +} + +static void osnoise_restore_workload(struct osnoise_context *context) +{ + int retval; + + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_opt_workload == context->opt_workload) + goto out_done; + + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", context->orig_opt_workload); + if (retval < 0) + err_msg("Could not restore original OSNOISE_WORKLOAD option\n"); + +out_done: + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; +} + +static void osnoise_put_workload(struct osnoise_context *context) +{ + osnoise_restore_workload(context); + + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; } enum { @@ -798,6 +954,12 @@ struct osnoise_context *osnoise_context_alloc(void) context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL; context->tracing_thresh = OSNOISE_OPTION_INIT_VAL; + context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; + context->opt_irq_disable = OSNOISE_OPTION_INIT_VAL; + + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; + context->opt_workload = OSNOISE_OPTION_INIT_VAL; + osnoise_get_context(context); return context; @@ -824,6 +986,8 @@ void osnoise_put_context(struct osnoise_context *context) osnoise_put_timerlat_period_us(context); osnoise_put_print_stack(context); osnoise_put_tracing_thresh(context); + osnoise_put_irq_disable(context); + osnoise_put_workload(context); free(context); } @@ -876,7 +1040,7 @@ out_err: /* * osnoise_init_trace_tool - init a tracer instance to trace osnoise events */ -struct osnoise_tool *osnoise_init_trace_tool(char *tracer) +struct osnoise_tool *osnoise_init_trace_tool(const char *tracer) { struct osnoise_tool *trace; int retval; @@ -903,6 +1067,129 @@ out_err: return NULL; } +bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record) +{ + /* + * The tool instance is always present, it is the one used to collect + * data. + */ + if (!tracefs_trace_is_on(tool->trace.inst)) + return true; + + /* + * The trace record instance is only enabled when -t is set. IOW, when the system + * is tracing. + */ + return record && !tracefs_trace_is_on(record->trace.inst); +} + +/* + * osnoise_report_missed_events - report number of events dropped by trace + * buffer + */ +void +osnoise_report_missed_events(struct osnoise_tool *tool) +{ + unsigned long long total_events; + + if (tool->trace.missed_events == UINT64_MAX) + printf("unknown number of events missed, results might not be accurate\n"); + else if (tool->trace.missed_events > 0) { + total_events = tool->trace.processed_events + tool->trace.missed_events; + + printf("%lld (%.2f%%) events missed, results might not be accurate\n", + tool->trace.missed_events, + (double) tool->trace.missed_events / total_events * 100.0); + } +} + +/* + * osnoise_apply_config - apply osnoise configs to the initialized tool + */ +int +osnoise_apply_config(struct osnoise_tool *tool, struct osnoise_params *params) +{ + int retval; + + params->common.kernel_workload = true; + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + } else { + retval = osnoise_set_runtime_period(tool->context, + DEFAULT_SAMPLE_PERIOD, + DEFAULT_SAMPLE_RUNTIME); + } + + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + + retval = osnoise_set_stop_us(tool->context, params->common.stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + + retval = osnoise_set_stop_total_us(tool->context, params->common.stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + + retval = osnoise_set_tracing_thresh(tool->context, params->threshold); + if (retval) { + err_msg("Failed to set tracing_thresh\n"); + goto out_err; + } + + return common_apply_config(tool, ¶ms->common); + +out_err: + return -1; +} + +int osnoise_enable(struct osnoise_tool *tool) +{ + struct osnoise_params *params = to_osnoise_params(tool->params); + int retval; + + /* + * Start the tracer here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (tool->record) + trace_instance_start(&tool->record->trace); + trace_instance_start(&tool->trace); + + if (params->common.warmup > 0) { + debug_msg("Warming up for %d seconds\n", params->common.warmup); + sleep(params->common.warmup); + if (stop_tracing) + return -1; + + /* + * Clean up the buffer. The osnoise workload do not run + * with tracing off to avoid creating a performance penalty + * when not needed. + */ + retval = tracefs_instance_file_write(tool->trace.inst, "trace", ""); + if (retval < 0) { + debug_msg("Error cleaning up the buffer"); + return retval; + } + + } + + return 0; +} + static void osnoise_usage(int err) { int i; @@ -936,7 +1223,7 @@ int osnoise_main(int argc, char *argv[]) * default cmdline. */ if (argc == 1) { - osnoise_top_main(argc, argv); + run_tool(&osnoise_top_ops, argc, argv); exit(0); } @@ -944,13 +1231,13 @@ int osnoise_main(int argc, char *argv[]) osnoise_usage(0); } else if (strncmp(argv[1], "-", 1) == 0) { /* the user skipped the tool, call the default one */ - osnoise_top_main(argc, argv); + run_tool(&osnoise_top_ops, argc, argv); exit(0); } else if (strcmp(argv[1], "top") == 0) { - osnoise_top_main(argc-1, &argv[1]); + run_tool(&osnoise_top_ops, argc-1, &argv[1]); exit(0); } else if (strcmp(argv[1], "hist") == 0) { - osnoise_hist_main(argc-1, &argv[1]); + run_tool(&osnoise_hist_ops, argc-1, &argv[1]); exit(0); } @@ -958,3 +1245,9 @@ usage: osnoise_usage(1); exit(1); } + +int hwnoise_main(int argc, char *argv[]) +{ + run_tool(&osnoise_top_ops, argc, argv); + exit(0); +} diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h index 04a4384cc544..895687030c0b 100644 --- a/tools/tracing/rtla/src/osnoise.h +++ b/tools/tracing/rtla/src/osnoise.h @@ -1,45 +1,23 @@ // SPDX-License-Identifier: GPL-2.0 -#include "trace.h" +#pragma once -/* - * osnoise_context - read, store, write, restore osnoise configs. - */ -struct osnoise_context { - int flags; - int ref; - - char *curr_cpus; - char *orig_cpus; - - /* 0 as init value */ - unsigned long long orig_runtime_us; - unsigned long long runtime_us; - - /* 0 as init value */ - unsigned long long orig_period_us; - unsigned long long period_us; +#include "common.h" - /* 0 as init value */ - long long orig_timerlat_period_us; - long long timerlat_period_us; - - /* 0 as init value */ - long long orig_tracing_thresh; - long long tracing_thresh; - - /* -1 as init value because 0 is disabled */ - long long orig_stop_us; - long long stop_us; - - /* -1 as init value because 0 is disabled */ - long long orig_stop_total_us; - long long stop_total_us; +enum osnoise_mode { + MODE_OSNOISE = 0, + MODE_HWNOISE +}; - /* -1 as init value because 0 is disabled */ - long long orig_print_stack; - long long print_stack; +struct osnoise_params { + struct common_params common; + unsigned long long runtime; + unsigned long long period; + long long threshold; + enum osnoise_mode mode; }; +#define to_osnoise_params(ptr) container_of(ptr, struct osnoise_params, common) + /* * *_INIT_VALs are also invalid values, they are used to * communicate errors. @@ -51,9 +29,6 @@ struct osnoise_context *osnoise_context_alloc(void); int osnoise_get_context(struct osnoise_context *context); void osnoise_put_context(struct osnoise_context *context); -int osnoise_set_cpus(struct osnoise_context *context, char *cpus); -void osnoise_restore_cpus(struct osnoise_context *context); - int osnoise_set_runtime_period(struct osnoise_context *context, unsigned long long runtime, unsigned long long period); @@ -79,21 +54,18 @@ void osnoise_restore_print_stack(struct osnoise_context *context); int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack); -/* - * osnoise_tool - osnoise based tool definition. - */ -struct osnoise_tool { - struct trace_instance trace; - struct osnoise_context *context; - void *data; - void *params; - time_t start_time; -}; - -void osnoise_destroy_tool(struct osnoise_tool *top); -struct osnoise_tool *osnoise_init_tool(char *tool_name); -struct osnoise_tool *osnoise_init_trace_tool(char *tracer); +int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff); +void osnoise_report_missed_events(struct osnoise_tool *tool); +int osnoise_apply_config(struct osnoise_tool *tool, struct osnoise_params *params); int osnoise_hist_main(int argc, char *argv[]); int osnoise_top_main(int argc, char **argv); +int osnoise_enable(struct osnoise_tool *tool); int osnoise_main(int argc, char **argv); +int hwnoise_main(int argc, char **argv); + +extern struct tool_ops timerlat_top_ops, timerlat_hist_ops; +extern struct tool_ops osnoise_top_ops, osnoise_hist_ops; + +int run_tool(struct tool_ops *ops, int argc, char *argv[]); +int hist_main_loop(struct osnoise_tool *tool); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 5d7ea479ac89..ff8c231e47c4 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -12,33 +13,8 @@ #include <stdio.h> #include <time.h> -#include "utils.h" #include "osnoise.h" -struct osnoise_hist_params { - char *cpus; - char *monitored_cpus; - char *trace_output; - unsigned long long runtime; - unsigned long long period; - long long threshold; - long long stop_us; - long long stop_total_us; - int sleep_time; - int duration; - int set_sched; - int output_divisor; - struct sched_attr sched_param; - struct trace_events *events; - - char no_header; - char no_summary; - char no_index; - char with_zeros; - int bucket_size; - int entries; -}; - struct osnoise_hist_cpu { int *samples; int count; @@ -78,6 +54,11 @@ osnoise_free_histogram(struct osnoise_hist_data *data) free(data); } +static void osnoise_free_hist_tool(struct osnoise_tool *tool) +{ + osnoise_free_histogram(tool->data); +} + /* * osnoise_alloc_histogram - alloc runtime data */ @@ -119,22 +100,24 @@ cleanup: static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, unsigned long long duration, int count) { - struct osnoise_hist_params *params = tool->params; + struct osnoise_params *params = to_osnoise_params(tool->params); struct osnoise_hist_data *data = tool->data; + unsigned long long total_duration; int entries = data->entries; int bucket; int *hist; - if (params->output_divisor) - duration = duration / params->output_divisor; + if (params->common.output_divisor) + duration = duration / params->common.output_divisor; - if (data->bucket_size) - bucket = duration / data->bucket_size; + bucket = duration / data->bucket_size; + + total_duration = duration * count; hist = data->hist[cpu].samples; data->hist[cpu].count += count; update_min(&data->hist[cpu].min_sample, &duration); - update_sum(&data->hist[cpu].sum_sample, &duration); + update_sum(&data->hist[cpu].sum_sample, &total_duration); update_max(&data->hist[cpu].max_sample, &duration); if (bucket < entries) @@ -159,7 +142,7 @@ static void osnoise_destroy_trace_hist(struct osnoise_tool *tool) */ static int osnoise_init_trace_hist(struct osnoise_tool *tool) { - struct osnoise_hist_params *params = tool->params; + struct osnoise_params *params = to_osnoise_params(tool->params); struct osnoise_hist_data *data = tool->data; int bucket_size; char buff[128]; @@ -168,7 +151,7 @@ static int osnoise_init_trace_hist(struct osnoise_tool *tool) /* * Set the size of the bucket. */ - bucket_size = params->output_divisor * params->bucket_size; + bucket_size = params->common.output_divisor * params->common.hist.bucket_size; snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size); data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold", @@ -244,29 +227,27 @@ static void osnoise_read_trace_hist(struct osnoise_tool *tool) */ static void osnoise_hist_header(struct osnoise_tool *tool) { - struct osnoise_hist_params *params = tool->params; + struct osnoise_params *params = to_osnoise_params(tool->params); struct osnoise_hist_data *data = tool->data; struct trace_seq *s = tool->trace.seq; char duration[26]; int cpu; - if (params->no_header) + if (params->common.hist.no_header) return; get_duration(tool->start_time, duration, sizeof(duration)); trace_seq_printf(s, "# RTLA osnoise histogram\n"); trace_seq_printf(s, "# Time unit is %s (%s)\n", - params->output_divisor == 1 ? "nanoseconds" : "microseconds", - params->output_divisor == 1 ? "ns" : "us"); + params->common.output_divisor == 1 ? "nanoseconds" : "microseconds", + params->common.output_divisor == 1 ? "ns" : "us"); trace_seq_printf(s, "# Duration: %s\n", duration); - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(s, "Index"); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].count) continue; @@ -283,21 +264,19 @@ static void osnoise_hist_header(struct osnoise_tool *tool) * osnoise_print_summary - print the summary of the hist data to the output */ static void -osnoise_print_summary(struct osnoise_hist_params *params, +osnoise_print_summary(struct osnoise_params *params, struct trace_instance *trace, struct osnoise_hist_data *data) { int cpu; - if (params->no_summary) + if (params->common.hist.no_summary) return; - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "count:"); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].count) continue; @@ -306,12 +285,10 @@ osnoise_print_summary(struct osnoise_hist_params *params, } trace_seq_printf(trace->seq, "\n"); - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "min: "); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].count) continue; @@ -321,30 +298,26 @@ osnoise_print_summary(struct osnoise_hist_params *params, } trace_seq_printf(trace->seq, "\n"); - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "avg: "); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].count) continue; if (data->hist[cpu].count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_sample / data->hist[cpu].count); + trace_seq_printf(trace->seq, "%9.2f ", + ((double) data->hist[cpu].sum_sample) / data->hist[cpu].count); else trace_seq_printf(trace->seq, " - "); } trace_seq_printf(trace->seq, "\n"); - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "max: "); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].count) continue; @@ -361,10 +334,12 @@ osnoise_print_summary(struct osnoise_hist_params *params, * osnoise_print_stats - print data for all CPUs */ static void -osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool) +osnoise_print_stats(struct osnoise_tool *tool) { + struct osnoise_params *params = to_osnoise_params(tool->params); struct osnoise_hist_data *data = tool->data; struct trace_instance *trace = &tool->trace; + int has_samples = 0; int bucket, cpu; int total; @@ -373,13 +348,11 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too for (bucket = 0; bucket < data->entries; bucket++) { total = 0; - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "%-6d", bucket * data->bucket_size); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].count) continue; @@ -388,22 +361,34 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]); } - if (total == 0 && !params->with_zeros) { + if (total == 0 && !params->common.hist.with_zeros) { trace_seq_reset(trace->seq); continue; } + /* There are samples above the threshold */ + has_samples = 1; trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); } - if (!params->no_index) + /* + * If no samples were recorded, skip calculations, print zeroed statistics + * and return. + */ + if (!has_samples) { + trace_seq_reset(trace->seq); + trace_seq_printf(trace->seq, "over: 0\ncount: 0\nmin: 0\navg: 0\nmax: 0\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + return; + } + + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "over: "); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].count) continue; @@ -416,21 +401,22 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too trace_seq_reset(trace->seq); osnoise_print_summary(params, trace, data); + osnoise_report_missed_events(tool); } /* * osnoise_hist_usage - prints osnoise hist usage message */ -static void osnoise_hist_usage(char *usage) +static void osnoise_hist_usage(void) { int i; static const char * const msg[] = { "", " usage: rtla osnoise hist [-h] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", - " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", - " [-c cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] [--no-index] \\", - " [--with-zeros]", + " [-T us] [-t [file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", + " [-c cpu-list] [-H cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros] [-C [cgroup_name]] [--warm-up]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", @@ -440,9 +426,11 @@ static void osnoise_hist_usage(char *usage) " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", " -T/--threshold us: the minimum delta to be considered a noise", " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup [cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", - " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -t/--trace [file]: save the stopped trace to [file|osnoise_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <filter>: enable a trace event filter to the previous -e event", " --trigger <trigger>: enable a trace event trigger to the previous -e event", @@ -458,39 +446,45 @@ static void osnoise_hist_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " --warm-up: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --on-threshold <action>: define action to be executed at stop-total threshold, multiple are allowed", + " --on-end <action>: define action to be executed at measurement end, multiple are allowed", NULL, }; - if (usage) - fprintf(stderr, "%s\n", usage); - fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n", VERSION); for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + exit(EXIT_SUCCESS); } /* * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters */ -static struct osnoise_hist_params +static struct common_params *osnoise_hist_parse_args(int argc, char *argv[]) { - struct osnoise_hist_params *params; + struct osnoise_params *params; struct trace_events *tevent; int retval; int c; + char *trace_output = NULL; params = calloc(1, sizeof(*params)); if (!params) exit(1); + actions_init(¶ms->common.threshold_actions); + actions_init(¶ms->common.end_actions); + /* display data in microseconds */ - params->output_divisor = 1000; - params->bucket_size = 1; - params->entries = 256; + params->common.output_divisor = 1000; + params->common.hist.bucket_size = 1; + params->common.hist.entries = 256; while (1) { static struct option long_options[] = { @@ -498,8 +492,10 @@ static struct osnoise_hist_params {"bucket-size", required_argument, 0, 'b'}, {"entries", required_argument, 0, 'E'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"debug", no_argument, 0, 'D'}, {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, {"help", no_argument, 0, 'h'}, {"period", required_argument, 0, 'p'}, {"priority", required_argument, 0, 'P'}, @@ -515,14 +511,15 @@ static struct osnoise_hist_params {"with-zeros", no_argument, 0, '3'}, {"trigger", required_argument, 0, '4'}, {"filter", required_argument, 0, '5'}, + {"warm-up", required_argument, 0, '6'}, + {"trace-buffer-size", required_argument, 0, '7'}, + {"on-threshold", required_argument, 0, '8'}, + {"on-end", required_argument, 0, '9'}, {0, 0, 0, 0} }; - /* getopt_long stores the option index here. */ - int option_index = 0; - - c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhp:P:r:s:S:t::T:01234:5:", - long_options, &option_index); + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:6:7:", + long_options, NULL); /* detect the end of the options. */ if (c == -1) @@ -531,201 +528,175 @@ static struct osnoise_hist_params switch (c) { case 'a': /* set sample stop to auto_thresh */ - params->stop_us = get_llong_from_str(optarg); + params->common.stop_us = get_llong_from_str(optarg); /* set sample threshold to 1 */ params->threshold = 1; /* set trace */ - params->trace_output = "osnoise_trace.txt"; + if (!trace_output) + trace_output = "osnoise_trace.txt"; break; case 'b': - params->bucket_size = get_llong_from_str(optarg); - if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) - osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + params->common.hist.bucket_size = get_llong_from_str(optarg); + if (params->common.hist.bucket_size == 0 || + params->common.hist.bucket_size >= 1000000) + fatal("Bucket size needs to be > 0 and <= 1000000"); break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->common.monitored_cpus); if (retval) - osnoise_hist_usage("\nInvalid -c cpu list\n"); - params->cpus = optarg; + fatal("Invalid -c cpu list"); + params->common.cpus = optarg; + break; + case 'C': + params->common.cgroup = 1; + params->common.cgroup_name = parse_optional_arg(argc, argv); break; case 'D': config_debug = 1; break; case 'd': - params->duration = parse_seconds_duration(optarg); - if (!params->duration) - osnoise_hist_usage("Invalid -D duration\n"); + params->common.duration = parse_seconds_duration(optarg); + if (!params->common.duration) + fatal("Invalid -D duration"); break; case 'e': tevent = trace_event_alloc(optarg); - if (!tevent) { - err_msg("Error alloc trace event"); - exit(EXIT_FAILURE); - } + if (!tevent) + fatal("Error alloc trace event"); - if (params->events) - tevent->next = params->events; + if (params->common.events) + tevent->next = params->common.events; - params->events = tevent; + params->common.events = tevent; break; case 'E': - params->entries = get_llong_from_str(optarg); - if ((params->entries < 10) || (params->entries > 9999999)) - osnoise_hist_usage("Entries must be > 10 and < 9999999\n"); + params->common.hist.entries = get_llong_from_str(optarg); + if (params->common.hist.entries < 10 || + params->common.hist.entries > 9999999) + fatal("Entries must be > 10 and < 9999999"); break; case 'h': case '?': - osnoise_hist_usage(NULL); + osnoise_hist_usage(); + break; + case 'H': + params->common.hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->common.hk_cpu_set); + if (retval) + fatal("Error parsing house keeping CPUs"); break; case 'p': params->period = get_llong_from_str(optarg); if (params->period > 10000000) - osnoise_hist_usage("Period longer than 10 s\n"); + fatal("Period longer than 10 s"); break; case 'P': - retval = parse_prio(optarg, ¶ms->sched_param); + retval = parse_prio(optarg, ¶ms->common.sched_param); if (retval == -1) - osnoise_hist_usage("Invalid -P priority"); - params->set_sched = 1; + fatal("Invalid -P priority"); + params->common.set_sched = 1; break; case 'r': params->runtime = get_llong_from_str(optarg); if (params->runtime < 100) - osnoise_hist_usage("Runtime shorter than 100 us\n"); + fatal("Runtime shorter than 100 us"); break; case 's': - params->stop_us = get_llong_from_str(optarg); + params->common.stop_us = get_llong_from_str(optarg); break; case 'S': - params->stop_total_us = get_llong_from_str(optarg); + params->common.stop_total_us = get_llong_from_str(optarg); break; case 'T': params->threshold = get_llong_from_str(optarg); break; case 't': - if (optarg) - /* skip = */ - params->trace_output = &optarg[1]; - else - params->trace_output = "osnoise_trace.txt"; + trace_output = parse_optional_arg(argc, argv); + if (!trace_output) + trace_output = "osnoise_trace.txt"; break; case '0': /* no header */ - params->no_header = 1; + params->common.hist.no_header = 1; break; case '1': /* no summary */ - params->no_summary = 1; + params->common.hist.no_summary = 1; break; case '2': /* no index */ - params->no_index = 1; + params->common.hist.no_index = 1; break; case '3': /* with zeros */ - params->with_zeros = 1; + params->common.hist.with_zeros = 1; break; case '4': /* trigger */ - if (params->events) { - retval = trace_event_add_trigger(params->events, optarg); - if (retval) { - err_msg("Error adding trigger %s\n", optarg); - exit(EXIT_FAILURE); - } + if (params->common.events) { + retval = trace_event_add_trigger(params->common.events, optarg); + if (retval) + fatal("Error adding trigger %s", optarg); } else { - osnoise_hist_usage("--trigger requires a previous -e\n"); + fatal("--trigger requires a previous -e"); } break; case '5': /* filter */ - if (params->events) { - retval = trace_event_add_filter(params->events, optarg); - if (retval) { - err_msg("Error adding filter %s\n", optarg); - exit(EXIT_FAILURE); - } + if (params->common.events) { + retval = trace_event_add_filter(params->common.events, optarg); + if (retval) + fatal("Error adding filter %s", optarg); } else { - osnoise_hist_usage("--filter requires a previous -e\n"); + fatal("--filter requires a previous -e"); } break; + case '6': + params->common.warmup = get_llong_from_str(optarg); + break; + case '7': + params->common.buffer_size = get_llong_from_str(optarg); + break; + case '8': + retval = actions_parse(¶ms->common.threshold_actions, optarg, + "osnoise_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + case '9': + retval = actions_parse(¶ms->common.end_actions, optarg, + "osnoise_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; default: - osnoise_hist_usage("Invalid option"); + fatal("Invalid option"); } } - if (geteuid()) { - err_msg("rtla needs root permission\n"); - exit(EXIT_FAILURE); - } + if (trace_output) + actions_add_trace_output(¶ms->common.threshold_actions, trace_output); - if (params->no_index && !params->with_zeros) - osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense"); + if (geteuid()) + fatal("rtla needs root permission"); - return params; + if (params->common.hist.no_index && !params->common.hist.with_zeros) + fatal("no-index set and with-zeros not set - it does not make sense"); + + return ¶ms->common; } /* * osnoise_hist_apply_config - apply the hist configs to the initialized tool */ static int -osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params) +osnoise_hist_apply_config(struct osnoise_tool *tool) { - int retval; - - if (!params->sleep_time) - params->sleep_time = 1; - - if (params->cpus) { - retval = osnoise_set_cpus(tool->context, params->cpus); - if (retval) { - err_msg("Failed to apply CPUs config\n"); - goto out_err; - } - } - - if (params->runtime || params->period) { - retval = osnoise_set_runtime_period(tool->context, - params->runtime, - params->period); - if (retval) { - err_msg("Failed to set runtime and/or period\n"); - goto out_err; - } - } - - if (params->stop_us) { - retval = osnoise_set_stop_us(tool->context, params->stop_us); - if (retval) { - err_msg("Failed to set stop us\n"); - goto out_err; - } - } - - if (params->stop_total_us) { - retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); - if (retval) { - err_msg("Failed to set stop total us\n"); - goto out_err; - } - } - - if (params->threshold) { - retval = osnoise_set_tracing_thresh(tool->context, params->threshold); - if (retval) { - err_msg("Failed to set tracing_thresh\n"); - goto out_err; - } - } - - return 0; - -out_err: - return -1; + return osnoise_apply_config(tool, to_osnoise_params(tool->params)); } /* * osnoise_init_hist - initialize a osnoise hist tool with parameters */ static struct osnoise_tool -*osnoise_init_hist(struct osnoise_hist_params *params) +*osnoise_init_hist(struct common_params *params) { struct osnoise_tool *tool; int nr_cpus; @@ -736,12 +707,11 @@ static struct osnoise_tool if (!tool) return NULL; - tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + tool->data = osnoise_alloc_histogram(nr_cpus, params->hist.entries, + params->hist.bucket_size); if (!tool->data) goto out_err; - tool->params = params; - return tool; out_err: @@ -749,132 +719,35 @@ out_err: return NULL; } -static int stop_tracing; -static void stop_hist(int sig) +static int osnoise_hist_enable(struct osnoise_tool *tool) { - stop_tracing = 1; -} - -/* - * osnoise_hist_set_signals - handles the signal to stop the tool - */ -static void -osnoise_hist_set_signals(struct osnoise_hist_params *params) -{ - signal(SIGINT, stop_hist); - if (params->duration) { - signal(SIGALRM, stop_hist); - alarm(params->duration); - } -} - -int osnoise_hist_main(int argc, char *argv[]) -{ - struct osnoise_hist_params *params; - struct osnoise_tool *record = NULL; - struct osnoise_tool *tool = NULL; - struct trace_instance *trace; - int return_value = 1; int retval; - params = osnoise_hist_parse_args(argc, argv); - if (!params) - exit(1); - - tool = osnoise_init_hist(params); - if (!tool) { - err_msg("Could not init osnoise hist\n"); - goto out_exit; - } - - retval = osnoise_hist_apply_config(tool, params); - if (retval) { - err_msg("Could not apply config\n"); - goto out_destroy; - } - - trace = &tool->trace; - - retval = enable_osnoise(trace); - if (retval) { - err_msg("Failed to enable osnoise tracer\n"); - goto out_destroy; - } - retval = osnoise_init_trace_hist(tool); if (retval) - goto out_destroy; - - if (params->set_sched) { - retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); - if (retval) { - err_msg("Failed to set sched parameters\n"); - goto out_free; - } - } - - trace_instance_start(trace); - - if (params->trace_output) { - record = osnoise_init_trace_tool("osnoise"); - if (!record) { - err_msg("Failed to enable the trace instance\n"); - goto out_free; - } - - if (params->events) { - retval = trace_events_enable(&record->trace, params->events); - if (retval) - goto out_hist; - } + return retval; - trace_instance_start(&record->trace); - } - - tool->start_time = time(NULL); - osnoise_hist_set_signals(params); - - while (!stop_tracing) { - sleep(params->sleep_time); - - retval = tracefs_iterate_raw_events(trace->tep, - trace->inst, - NULL, - 0, - collect_registered_events, - trace); - if (retval < 0) { - err_msg("Error iterating on events\n"); - goto out_hist; - } + return osnoise_enable(tool); +} - if (trace_is_off(&tool->trace, &record->trace)) - break; - } +static int osnoise_hist_main_loop(struct osnoise_tool *tool) +{ + int retval; + retval = hist_main_loop(tool); osnoise_read_trace_hist(tool); - osnoise_print_stats(params, tool); - - return_value = 0; - - if (trace_is_off(&tool->trace, &record->trace)) { - printf("rtla osnoise hit stop tracing\n"); - if (params->trace_output) { - printf(" Saving trace to %s\n", params->trace_output); - save_trace_to_file(record->trace.inst, params->trace_output); - } - } - -out_hist: - trace_events_destroy(&record->trace, params->events); - params->events = NULL; -out_free: - osnoise_free_histogram(tool->data); -out_destroy: - osnoise_destroy_tool(record); - osnoise_destroy_tool(tool); - free(params); -out_exit: - exit(return_value); + return retval; } + +struct tool_ops osnoise_hist_ops = { + .tracer = "osnoise", + .comm_prefix = "osnoise/", + .parse_args = osnoise_hist_parse_args, + .init_tool = osnoise_init_hist, + .apply_config = osnoise_hist_apply_config, + .enable = osnoise_hist_enable, + .main = osnoise_hist_main_loop, + .print_stats = osnoise_print_stats, + .free = osnoise_free_hist_tool, +}; diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 76479bfb2922..04c699bdd736 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -12,27 +13,6 @@ #include <time.h> #include "osnoise.h" -#include "utils.h" - -/* - * osnoise top parameters - */ -struct osnoise_top_params { - char *cpus; - char *monitored_cpus; - char *trace_output; - unsigned long long runtime; - unsigned long long period; - long long threshold; - long long stop_us; - long long stop_total_us; - int sleep_time; - int duration; - int quiet; - int set_sched; - struct sched_attr sched_param; - struct trace_events *events; -}; struct osnoise_top_cpu { unsigned long long sum_runtime; @@ -57,13 +37,17 @@ struct osnoise_top_data { /* * osnoise_free_top - free runtime data */ -static void -osnoise_free_top(struct osnoise_top_data *data) +static void osnoise_free_top(struct osnoise_top_data *data) { free(data->cpu_data); free(data); } +static void osnoise_free_top_tool(struct osnoise_tool *tool) +{ + osnoise_free_top(tool->data); +} + /* * osnoise_alloc_histogram - alloc runtime data */ @@ -143,27 +127,50 @@ osnoise_top_handler(struct trace_seq *s, struct tep_record *record, */ static void osnoise_top_header(struct osnoise_tool *top) { + struct osnoise_params *params = to_osnoise_params(top->params); struct trace_seq *s = top->trace.seq; + bool pretty = params->common.pretty_output; char duration[26]; get_duration(top->start_time, duration, sizeof(duration)); - trace_seq_printf(s, "\033[2;37;40m"); - trace_seq_printf(s, " Operating System Noise"); - trace_seq_printf(s, " "); - trace_seq_printf(s, " "); - trace_seq_printf(s, "\033[0;0;0m"); + if (pretty) + trace_seq_printf(s, "\033[2;37;40m"); + + trace_seq_printf(s, " "); + + if (params->mode == MODE_OSNOISE) { + trace_seq_printf(s, "Operating System Noise"); + trace_seq_printf(s, " "); + } else if (params->mode == MODE_HWNOISE) { + trace_seq_printf(s, "Hardware-related Noise"); + } + + trace_seq_printf(s, " "); + + if (pretty) + trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); trace_seq_printf(s, "duration: %9s | time is in us\n", duration); - trace_seq_printf(s, "\033[2;30;47m"); + if (pretty) + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU Period Runtime "); trace_seq_printf(s, " Noise "); trace_seq_printf(s, " %% CPU Aval "); trace_seq_printf(s, " Max Noise Max Single "); - trace_seq_printf(s, " HW NMI IRQ Softirq Thread"); - trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, " HW NMI"); + + if (params->mode == MODE_HWNOISE) + goto eol; + + trace_seq_printf(s, " IRQ Softirq Thread"); + +eol: + if (pretty) + trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); } @@ -181,6 +188,7 @@ static void clear_terminal(struct trace_seq *seq) */ static void osnoise_top_print(struct osnoise_tool *tool, int cpu) { + struct osnoise_params *params = to_osnoise_params(tool->params); struct trace_seq *s = tool->trace.seq; struct osnoise_top_cpu *cpu_data; struct osnoise_top_data *data; @@ -205,6 +213,12 @@ static void osnoise_top_print(struct osnoise_tool *tool, int cpu) trace_seq_printf(s, "%12llu ", cpu_data->hw_count); trace_seq_printf(s, "%12llu ", cpu_data->nmi_count); + + if (params->mode == MODE_HWNOISE) { + trace_seq_printf(s, "\n"); + return; + } + trace_seq_printf(s, "%12llu ", cpu_data->irq_count); trace_seq_printf(s, "%12llu ", cpu_data->softirq_count); trace_seq_printf(s, "%12llu\n", cpu_data->thread_count); @@ -214,8 +228,9 @@ static void osnoise_top_print(struct osnoise_tool *tool, int cpu) * osnoise_print_stats - print data for all cpus */ static void -osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) +osnoise_print_stats(struct osnoise_tool *top) { + struct osnoise_params *params = to_osnoise_params(top->params); struct trace_instance *trace = &top->trace; static int nr_cpus = -1; int i; @@ -223,32 +238,31 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) if (nr_cpus == -1) nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - if (!params->quiet) + if (!params->common.quiet) clear_terminal(trace->seq); osnoise_top_header(top); - for (i = 0; i < nr_cpus; i++) { - if (params->cpus && !params->monitored_cpus[i]) - continue; + for_each_monitored_cpu(i, nr_cpus, ¶ms->common) { osnoise_top_print(top, i); } trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); + osnoise_report_missed_events(top); } /* * osnoise_top_usage - prints osnoise top usage message */ -void osnoise_top_usage(char *usage) +static void osnoise_top_usage(struct osnoise_params *params) { int i; static const char * const msg[] = { - " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", - " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", - " [-c cpu-list] [-P priority]", + " [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", + " [-T us] [-t [file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", + " [-c cpu-list] [-H cpu-list] [-P priority] [-C [cgroup_name]] [--warm-up s]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", @@ -258,9 +272,11 @@ void osnoise_top_usage(char *usage) " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", " -T/--threshold us: the minimum delta to be considered a noise", " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup [cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", - " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -t/--trace [file]: save the stopped trace to [file|osnoise_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <filter>: enable a trace event filter to the previous -e event", " --trigger <trigger>: enable a trace event trigger to the previous -e event", @@ -271,41 +287,71 @@ void osnoise_top_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --on-threshold <action>: define action to be executed at stop-total threshold, multiple are allowed", + " --on-end: define action to be executed at measurement end, multiple are allowed", NULL, }; - if (usage) - fprintf(stderr, "%s\n", usage); + if (params->mode == MODE_OSNOISE) { + fprintf(stderr, + "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n", + VERSION); - fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n", + fprintf(stderr, " usage: rtla osnoise [top]"); + } + + if (params->mode == MODE_HWNOISE) { + fprintf(stderr, + "rtla hwnoise: a summary of hardware-related noise (version %s)\n", VERSION); + fprintf(stderr, " usage: rtla hwnoise"); + } + for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + exit(EXIT_SUCCESS); } /* * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters */ -struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) +struct common_params *osnoise_top_parse_args(int argc, char **argv) { - struct osnoise_top_params *params; + struct osnoise_params *params; struct trace_events *tevent; int retval; int c; + char *trace_output = NULL; params = calloc(1, sizeof(*params)); if (!params) exit(1); + actions_init(¶ms->common.threshold_actions); + actions_init(¶ms->common.end_actions); + + if (strcmp(argv[0], "hwnoise") == 0) { + params->mode = MODE_HWNOISE; + /* + * Reduce CPU usage for 75% to avoid killing the system. + */ + params->runtime = 750000; + params->period = 1000000; + } + while (1) { static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"debug", no_argument, 0, 'D'}, {"duration", required_argument, 0, 'd'}, {"event", required_argument, 0, 'e'}, + {"house-keeping", required_argument, 0, 'H'}, {"help", no_argument, 0, 'h'}, {"period", required_argument, 0, 'p'}, {"priority", required_argument, 0, 'P'}, @@ -317,14 +363,15 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) {"trace", optional_argument, 0, 't'}, {"trigger", required_argument, 0, '0'}, {"filter", required_argument, 0, '1'}, + {"warm-up", required_argument, 0, '2'}, + {"trace-buffer-size", required_argument, 0, '3'}, + {"on-threshold", required_argument, 0, '4'}, + {"on-end", required_argument, 0, '5'}, {0, 0, 0, 0} }; - /* getopt_long stores the option index here. */ - int option_index = 0; - - c = getopt_long(argc, argv, "a:c:d:De:hp:P:qr:s:S:t::T:0:1:", - long_options, &option_index); + c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:2:3:", + long_options, NULL); /* Detect the end of the options. */ if (c == -1) @@ -333,167 +380,160 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) switch (c) { case 'a': /* set sample stop to auto_thresh */ - params->stop_us = get_llong_from_str(optarg); + params->common.stop_us = get_llong_from_str(optarg); /* set sample threshold to 1 */ params->threshold = 1; /* set trace */ - params->trace_output = "osnoise_trace.txt"; + if (!trace_output) + trace_output = "osnoise_trace.txt"; break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->common.monitored_cpus); if (retval) - osnoise_top_usage("\nInvalid -c cpu list\n"); - params->cpus = optarg; + fatal("Invalid -c cpu list"); + params->common.cpus = optarg; + break; + case 'C': + params->common.cgroup = 1; + params->common.cgroup_name = parse_optional_arg(argc, argv); break; case 'D': config_debug = 1; break; case 'd': - params->duration = parse_seconds_duration(optarg); - if (!params->duration) - osnoise_top_usage("Invalid -D duration\n"); + params->common.duration = parse_seconds_duration(optarg); + if (!params->common.duration) + fatal("Invalid -d duration"); break; case 'e': tevent = trace_event_alloc(optarg); - if (!tevent) { - err_msg("Error alloc trace event"); - exit(EXIT_FAILURE); - } + if (!tevent) + fatal("Error alloc trace event"); - if (params->events) - tevent->next = params->events; - params->events = tevent; + if (params->common.events) + tevent->next = params->common.events; + params->common.events = tevent; break; case 'h': case '?': - osnoise_top_usage(NULL); + osnoise_top_usage(params); + break; + case 'H': + params->common.hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->common.hk_cpu_set); + if (retval) + fatal("Error parsing house keeping CPUs"); break; case 'p': params->period = get_llong_from_str(optarg); if (params->period > 10000000) - osnoise_top_usage("Period longer than 10 s\n"); + fatal("Period longer than 10 s"); break; case 'P': - retval = parse_prio(optarg, ¶ms->sched_param); + retval = parse_prio(optarg, ¶ms->common.sched_param); if (retval == -1) - osnoise_top_usage("Invalid -P priority"); - params->set_sched = 1; + fatal("Invalid -P priority"); + params->common.set_sched = 1; break; case 'q': - params->quiet = 1; + params->common.quiet = 1; break; case 'r': params->runtime = get_llong_from_str(optarg); if (params->runtime < 100) - osnoise_top_usage("Runtime shorter than 100 us\n"); + fatal("Runtime shorter than 100 us"); break; case 's': - params->stop_us = get_llong_from_str(optarg); + params->common.stop_us = get_llong_from_str(optarg); break; case 'S': - params->stop_total_us = get_llong_from_str(optarg); + params->common.stop_total_us = get_llong_from_str(optarg); break; case 't': - if (optarg) - /* skip = */ - params->trace_output = &optarg[1]; - else - params->trace_output = "osnoise_trace.txt"; + trace_output = parse_optional_arg(argc, argv); + if (!trace_output) + trace_output = "osnoise_trace.txt"; break; case 'T': params->threshold = get_llong_from_str(optarg); break; case '0': /* trigger */ - if (params->events) { - retval = trace_event_add_trigger(params->events, optarg); - if (retval) { - err_msg("Error adding trigger %s\n", optarg); - exit(EXIT_FAILURE); - } + if (params->common.events) { + retval = trace_event_add_trigger(params->common.events, optarg); + if (retval) + fatal("Error adding trigger %s", optarg); } else { - osnoise_top_usage("--trigger requires a previous -e\n"); + fatal("--trigger requires a previous -e"); } break; case '1': /* filter */ - if (params->events) { - retval = trace_event_add_filter(params->events, optarg); - if (retval) { - err_msg("Error adding filter %s\n", optarg); - exit(EXIT_FAILURE); - } + if (params->common.events) { + retval = trace_event_add_filter(params->common.events, optarg); + if (retval) + fatal("Error adding filter %s", optarg); } else { - osnoise_top_usage("--filter requires a previous -e\n"); + fatal("--filter requires a previous -e"); } break; + case '2': + params->common.warmup = get_llong_from_str(optarg); + break; + case '3': + params->common.buffer_size = get_llong_from_str(optarg); + break; + case '4': + retval = actions_parse(¶ms->common.threshold_actions, optarg, + "osnoise_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + case '5': + retval = actions_parse(¶ms->common.end_actions, optarg, + "osnoise_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; default: - osnoise_top_usage("Invalid option"); + fatal("Invalid option"); } } - if (geteuid()) { - err_msg("osnoise needs root permission\n"); - exit(EXIT_FAILURE); - } + if (trace_output) + actions_add_trace_output(¶ms->common.threshold_actions, trace_output); - return params; + if (geteuid()) + fatal("osnoise needs root permission"); + + return ¶ms->common; } /* * osnoise_top_apply_config - apply the top configs to the initialized tool */ static int -osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params) +osnoise_top_apply_config(struct osnoise_tool *tool) { + struct osnoise_params *params = to_osnoise_params(tool->params); int retval; - if (!params->sleep_time) - params->sleep_time = 1; - - if (params->cpus) { - retval = osnoise_set_cpus(tool->context, params->cpus); - if (retval) { - err_msg("Failed to apply CPUs config\n"); - goto out_err; - } - } - - if (params->runtime || params->period) { - retval = osnoise_set_runtime_period(tool->context, - params->runtime, - params->period); - if (retval) { - err_msg("Failed to set runtime and/or period\n"); - goto out_err; - } - } + retval = osnoise_apply_config(tool, params); + if (retval) + goto out_err; - if (params->stop_us) { - retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (params->mode == MODE_HWNOISE) { + retval = osnoise_set_irq_disable(tool->context, 1); if (retval) { - err_msg("Failed to set stop us\n"); + err_msg("Failed to set OSNOISE_IRQ_DISABLE option\n"); goto out_err; } } - if (params->stop_total_us) { - retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); - if (retval) { - err_msg("Failed to set stop total us\n"); - goto out_err; - } - } - - if (params->threshold) { - retval = osnoise_set_tracing_thresh(tool->context, params->threshold); - if (retval) { - err_msg("Failed to set tracing_thresh\n"); - goto out_err; - } - } + if (isatty(STDOUT_FILENO) && !params->common.quiet) + params->common.pretty_output = 1; return 0; @@ -504,7 +544,7 @@ out_err: /* * osnoise_init_top - initialize a osnoise top tool with parameters */ -struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) +struct osnoise_tool *osnoise_init_top(struct common_params *params) { struct osnoise_tool *tool; int nr_cpus; @@ -516,144 +556,25 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) return NULL; tool->data = osnoise_alloc_top(nr_cpus); - if (!tool->data) - goto out_err; - - tool->params = params; + if (!tool->data) { + osnoise_destroy_tool(tool); + return NULL; + } tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise", osnoise_top_handler, NULL); return tool; - -out_err: - osnoise_free_top(tool->data); - osnoise_destroy_tool(tool); - return NULL; -} - -static int stop_tracing; -static void stop_top(int sig) -{ - stop_tracing = 1; -} - -/* - * osnoise_top_set_signals - handles the signal to stop the tool - */ -static void osnoise_top_set_signals(struct osnoise_top_params *params) -{ - signal(SIGINT, stop_top); - if (params->duration) { - signal(SIGALRM, stop_top); - alarm(params->duration); - } } -int osnoise_top_main(int argc, char **argv) -{ - struct osnoise_top_params *params; - struct osnoise_tool *record = NULL; - struct osnoise_tool *tool = NULL; - struct trace_instance *trace; - int return_value = 1; - int retval; - - params = osnoise_top_parse_args(argc, argv); - if (!params) - exit(1); - - tool = osnoise_init_top(params); - if (!tool) { - err_msg("Could not init osnoise top\n"); - goto out_exit; - } - - retval = osnoise_top_apply_config(tool, params); - if (retval) { - err_msg("Could not apply config\n"); - goto out_free; - } - - trace = &tool->trace; - - retval = enable_osnoise(trace); - if (retval) { - err_msg("Failed to enable osnoise tracer\n"); - goto out_free; - } - - if (params->set_sched) { - retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); - if (retval) { - err_msg("Failed to set sched parameters\n"); - goto out_free; - } - } - - trace_instance_start(trace); - - if (params->trace_output) { - record = osnoise_init_trace_tool("osnoise"); - if (!record) { - err_msg("Failed to enable the trace instance\n"); - goto out_free; - } - - if (params->events) { - retval = trace_events_enable(&record->trace, params->events); - if (retval) - goto out_top; - } - - trace_instance_start(&record->trace); - } - - tool->start_time = time(NULL); - osnoise_top_set_signals(params); - - while (!stop_tracing) { - sleep(params->sleep_time); - - retval = tracefs_iterate_raw_events(trace->tep, - trace->inst, - NULL, - 0, - collect_registered_events, - trace); - if (retval < 0) { - err_msg("Error iterating on events\n"); - goto out_top; - } - - if (!params->quiet) - osnoise_print_stats(params, tool); - - if (trace_is_off(&tool->trace, &record->trace)) - break; - - } - - osnoise_print_stats(params, tool); - - return_value = 0; - - if (trace_is_off(&tool->trace, &record->trace)) { - printf("osnoise hit stop tracing\n"); - if (params->trace_output) { - printf(" Saving trace to %s\n", params->trace_output); - save_trace_to_file(record->trace.inst, params->trace_output); - } - } - -out_top: - trace_events_destroy(&record->trace, params->events); - params->events = NULL; -out_free: - osnoise_free_top(tool->data); - osnoise_destroy_tool(record); - osnoise_destroy_tool(tool); - free(params); -out_exit: - exit(return_value); -} +struct tool_ops osnoise_top_ops = { + .tracer = "osnoise", + .comm_prefix = "osnoise/", + .parse_args = osnoise_top_parse_args, + .init_tool = osnoise_init_top, + .apply_config = osnoise_top_apply_config, + .enable = osnoise_enable, + .main = top_main_loop, + .print_stats = osnoise_print_stats, + .free = osnoise_free_top_tool, +}; diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c index 52e8f1825281..7635c70123ab 100644 --- a/tools/tracing/rtla/src/rtla.c +++ b/tools/tracing/rtla/src/rtla.c @@ -26,6 +26,7 @@ static void rtla_usage(int err) "", " commands:", " osnoise - gives information about the operating system noise (osnoise)", + " hwnoise - gives information about hardware-related noise", " timerlat - measures the timer irq and thread latency", "", NULL, @@ -47,6 +48,9 @@ int run_command(int argc, char **argv, int start_position) if (strcmp(argv[start_position], "osnoise") == 0) { osnoise_main(argc-start_position, &argv[start_position]); goto ran; + } else if (strcmp(argv[start_position], "hwnoise") == 0) { + hwnoise_main(argc-start_position, &argv[start_position]); + goto ran; } else if (strcmp(argv[start_position], "timerlat") == 0) { timerlat_main(argc-start_position, &argv[start_position]); goto ran; diff --git a/tools/tracing/rtla/src/timerlat.bpf.c b/tools/tracing/rtla/src/timerlat.bpf.c new file mode 100644 index 000000000000..e2265b5d6491 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.bpf.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "timerlat_bpf.h" + +#define nosubprog __always_inline +#define MAX_ENTRIES_DEFAULT 4096 + +char LICENSE[] SEC("license") = "GPL"; + +struct trace_event_raw_timerlat_sample { + unsigned long long timer_latency; + int context; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_ENTRIES_DEFAULT); + __type(key, unsigned int); + __type(value, unsigned long long); +} hist_irq SEC(".maps"), hist_thread SEC(".maps"), hist_user SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, SUMMARY_FIELD_N); + __type(key, unsigned int); + __type(value, unsigned long long); +} summary_irq SEC(".maps"), summary_thread SEC(".maps"), summary_user SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, unsigned long long); +} stop_tracing SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1); +} signal_stop_tracing SEC(".maps"); + +/* Params to be set by rtla */ +const volatile int bucket_size = 1; +const volatile int output_divisor = 1000; +const volatile int entries = 256; +const volatile int irq_threshold; +const volatile int thread_threshold; +const volatile bool aa_only; + +nosubprog unsigned long long map_get(void *map, + unsigned int key) +{ + unsigned long long *value_ptr; + + value_ptr = bpf_map_lookup_elem(map, &key); + + return !value_ptr ? 0 : *value_ptr; +} + +nosubprog void map_set(void *map, + unsigned int key, + unsigned long long value) +{ + bpf_map_update_elem(map, &key, &value, BPF_ANY); +} + +nosubprog void map_increment(void *map, + unsigned int key) +{ + map_set(map, key, map_get(map, key) + 1); +} + +nosubprog void update_main_hist(void *map, + int bucket) +{ + if (entries == 0) + /* No histogram */ + return; + + if (bucket >= entries) + /* Overflow */ + return; + + map_increment(map, bucket); +} + +nosubprog void update_summary(void *map, + unsigned long long latency, + int bucket) +{ + if (aa_only) + /* Auto-analysis only, nothing to be done here */ + return; + + map_set(map, SUMMARY_CURRENT, latency); + + if (bucket >= entries) + /* Overflow */ + map_increment(map, SUMMARY_OVERFLOW); + + if (latency > map_get(map, SUMMARY_MAX)) + map_set(map, SUMMARY_MAX, latency); + + if (latency < map_get(map, SUMMARY_MIN) || map_get(map, SUMMARY_COUNT) == 0) + map_set(map, SUMMARY_MIN, latency); + + map_increment(map, SUMMARY_COUNT); + map_set(map, SUMMARY_SUM, map_get(map, SUMMARY_SUM) + latency); +} + +nosubprog void set_stop_tracing(void) +{ + int value = 0; + + /* Suppress further sample processing */ + map_set(&stop_tracing, 0, 1); + + /* Signal to userspace */ + bpf_ringbuf_output(&signal_stop_tracing, &value, sizeof(value), 0); +} + +SEC("tp/osnoise/timerlat_sample") +int handle_timerlat_sample(struct trace_event_raw_timerlat_sample *tp_args) +{ + unsigned long long latency, latency_us; + int bucket; + + if (map_get(&stop_tracing, 0)) + return 0; + + latency = tp_args->timer_latency / output_divisor; + latency_us = tp_args->timer_latency / 1000; + bucket = latency / bucket_size; + + if (tp_args->context == 0) { + update_main_hist(&hist_irq, bucket); + update_summary(&summary_irq, latency, bucket); + + if (irq_threshold != 0 && latency_us >= irq_threshold) + set_stop_tracing(); + } else if (tp_args->context == 1) { + update_main_hist(&hist_thread, bucket); + update_summary(&summary_thread, latency, bucket); + + if (thread_threshold != 0 && latency_us >= thread_threshold) + set_stop_tracing(); + } else { + update_main_hist(&hist_user, bucket); + update_summary(&summary_user, latency, bucket); + + if (thread_threshold != 0 && latency_us >= thread_threshold) + set_stop_tracing(); + } + + return 0; +} diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c index 21cdcc5c4a29..df4f9bfe3433 100644 --- a/tools/tracing/rtla/src/timerlat.c +++ b/tools/tracing/rtla/src/timerlat.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <sys/types.h> #include <sys/stat.h> #include <pthread.h> @@ -11,8 +12,225 @@ #include <errno.h> #include <fcntl.h> #include <stdio.h> +#include <sched.h> #include "timerlat.h" +#include "timerlat_aa.h" +#include "timerlat_bpf.h" + +#define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ + +static int dma_latency_fd = -1; + +/* + * timerlat_apply_config - apply common configs to the initialized tool + */ +int +timerlat_apply_config(struct osnoise_tool *tool, struct timerlat_params *params) +{ + int retval; + + /* + * Try to enable BPF, unless disabled explicitly. + * If BPF enablement fails, fall back to tracefs mode. + */ + if (getenv("RTLA_NO_BPF") && strncmp(getenv("RTLA_NO_BPF"), "1", 2) == 0) { + debug_msg("RTLA_NO_BPF set, disabling BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } else if (!tep_find_event_by_name(tool->trace.tep, "osnoise", "timerlat_sample")) { + debug_msg("osnoise:timerlat_sample missing, disabling BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } else { + retval = timerlat_bpf_init(params); + if (retval) { + debug_msg("Could not enable BPF\n"); + params->mode = TRACING_MODE_TRACEFS; + } + } + + if (params->mode != TRACING_MODE_BPF) { + /* + * In tracefs and mixed mode, timerlat tracer handles stopping + * on threshold + */ + retval = osnoise_set_stop_us(tool->context, params->common.stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + + retval = osnoise_set_stop_total_us(tool->context, params->common.stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + + retval = osnoise_set_timerlat_period_us(tool->context, + params->timerlat_period_us ? + params->timerlat_period_us : + DEFAULT_TIMERLAT_PERIOD); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + + + retval = osnoise_set_print_stack(tool->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + + /* + * If the user did not specify a type of thread, try user-threads first. + * Fall back to kernel threads otherwise. + */ + if (!params->common.kernel_workload && !params->common.user_data) { + retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); + if (retval) { + debug_msg("User-space interface detected, setting user-threads\n"); + params->common.user_workload = 1; + params->common.user_data = 1; + } else { + debug_msg("User-space interface not detected, setting kernel-threads\n"); + params->common.kernel_workload = 1; + } + } + + return common_apply_config(tool, ¶ms->common); + +out_err: + return -1; +} + +int timerlat_enable(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval, nr_cpus, i; + + if (params->dma_latency >= 0) { + dma_latency_fd = set_cpu_dma_latency(params->dma_latency); + if (dma_latency_fd < 0) { + err_msg("Could not set /dev/cpu_dma_latency.\n"); + return -1; + } + } + + if (params->deepest_idle_state >= -1) { + if (!have_libcpupower_support()) { + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + return -1; + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for_each_monitored_cpu(i, nr_cpus, ¶ms->common) { + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + return -1; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + return -1; + } + } + } + + if (!params->no_aa) { + tool->aa = osnoise_init_tool("timerlat_aa"); + if (!tool->aa) + return -1; + + retval = timerlat_aa_init(tool->aa, params->dump_tasks); + if (retval) { + err_msg("Failed to enable the auto analysis instance\n"); + return retval; + } + + retval = enable_tracer_by_name(tool->aa->trace.inst, "timerlat"); + if (retval) { + err_msg("Failed to enable aa tracer\n"); + return retval; + } + } + + if (params->common.warmup > 0) { + debug_msg("Warming up for %d seconds\n", params->common.warmup); + sleep(params->common.warmup); + if (stop_tracing) + return -1; + } + + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (tool->record) + trace_instance_start(&tool->record->trace); + if (!params->no_aa) + trace_instance_start(&tool->aa->trace); + if (params->mode == TRACING_MODE_TRACEFS) { + trace_instance_start(&tool->trace); + } else { + retval = timerlat_bpf_attach(); + if (retval) { + err_msg("Error attaching BPF program\n"); + return retval; + } + } + + return 0; +} + +void timerlat_analyze(struct osnoise_tool *tool, bool stopped) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + + if (stopped) { + if (!params->no_aa) + timerlat_auto_analysis(params->common.stop_us, + params->common.stop_total_us); + } else if (params->common.aa_only) { + char *max_lat; + + /* + * If the trace did not stop with --aa-only, at least print + * the max known latency. + */ + max_lat = tracefs_instance_file_read(trace_inst->inst, "tracing_max_latency", NULL); + if (max_lat) { + printf(" Max latency was %s\n", max_lat); + free(max_lat); + } + } +} + +void timerlat_free(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + int i; + + timerlat_aa_destroy(); + if (dma_latency_fd >= 0) + close(dma_latency_fd); + if (params->deepest_idle_state >= -1) { + for_each_monitored_cpu(i, nr_cpus, ¶ms->common) { + restore_cpu_idle_disable_state(i); + } + } + + osnoise_destroy_tool(tool->aa); + + if (params->mode != TRACING_MODE_TRACEFS) + timerlat_bpf_destroy(); + free_cpu_idle_disable_states(); +} static void timerlat_usage(int err) { @@ -47,7 +265,7 @@ int timerlat_main(int argc, char *argv[]) * default cmdline. */ if (argc == 1) { - timerlat_top_main(argc, argv); + run_tool(&timerlat_top_ops, argc, argv); exit(0); } @@ -55,13 +273,13 @@ int timerlat_main(int argc, char *argv[]) timerlat_usage(0); } else if (strncmp(argv[1], "-", 1) == 0) { /* the user skipped the tool, call the default one */ - timerlat_top_main(argc, argv); + run_tool(&timerlat_top_ops, argc, argv); exit(0); } else if (strcmp(argv[1], "top") == 0) { - timerlat_top_main(argc-1, &argv[1]); + run_tool(&timerlat_top_ops, argc-1, &argv[1]); exit(0); } else if (strcmp(argv[1], "hist") == 0) { - timerlat_hist_main(argc-1, &argv[1]); + run_tool(&timerlat_hist_ops, argc-1, &argv[1]); exit(0); } diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h index 88561bfd14f3..fd6065f48bb7 100644 --- a/tools/tracing/rtla/src/timerlat.h +++ b/tools/tracing/rtla/src/timerlat.h @@ -1,4 +1,39 @@ // SPDX-License-Identifier: GPL-2.0 -int timerlat_hist_main(int argc, char *argv[]); -int timerlat_top_main(int argc, char *argv[]); +#include "osnoise.h" + +/* + * Define timerlat tracing mode. + * + * There are three tracing modes: + * - tracefs-only, used when BPF is unavailable. + * - BPF-only, used when BPF is available and neither trace saving nor + * auto-analysis are enabled. + * - mixed mode, used when BPF is available and either trace saving or + * auto-analysis is enabled (which rely on sample collection through + * tracefs). + */ +enum timerlat_tracing_mode { + TRACING_MODE_BPF, + TRACING_MODE_TRACEFS, + TRACING_MODE_MIXED, +}; + +struct timerlat_params { + struct common_params common; + long long timerlat_period_us; + long long print_stack; + int dma_latency; + int no_aa; + int dump_tasks; + int deepest_idle_state; + enum timerlat_tracing_mode mode; +}; + +#define to_timerlat_params(ptr) container_of(ptr, struct timerlat_params, common) + +int timerlat_apply_config(struct osnoise_tool *tool, struct timerlat_params *params); int timerlat_main(int argc, char *argv[]); +int timerlat_enable(struct osnoise_tool *tool); +void timerlat_analyze(struct osnoise_tool *tool, bool stopped); +void timerlat_free(struct osnoise_tool *tool); + diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c new file mode 100644 index 000000000000..31e66ea2b144 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -0,0 +1,1056 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <stdlib.h> +#include <errno.h> +#include "timerlat.h" +#include <unistd.h> + +enum timelat_state { + TIMERLAT_INIT = 0, + TIMERLAT_WAITING_IRQ, + TIMERLAT_WAITING_THREAD, +}; + +/* Used to fill spaces in the output */ +static const char *spaces = " "; + +#define MAX_COMM 24 + +/* + * Per-cpu data statistics and data. + */ +struct timerlat_aa_data { + /* Current CPU state */ + int curr_state; + + /* timerlat IRQ latency */ + unsigned long long tlat_irq_seqnum; + unsigned long long tlat_irq_latency; + unsigned long long tlat_irq_timstamp; + + /* timerlat Thread latency */ + unsigned long long tlat_thread_seqnum; + unsigned long long tlat_thread_latency; + unsigned long long tlat_thread_timstamp; + + /* + * Information about the thread running when the IRQ + * arrived. + * + * This can be blocking or interference, depending on the + * priority of the thread. Assuming timerlat is the highest + * prio, it is blocking. If timerlat has a lower prio, it is + * interference. + * note: "unsigned long long" because they are fetch using tep_get_field_val(); + */ + unsigned long long run_thread_pid; + char run_thread_comm[MAX_COMM]; + unsigned long long thread_blocking_duration; + unsigned long long max_exit_idle_latency; + + /* Information about the timerlat timer irq */ + unsigned long long timer_irq_start_time; + unsigned long long timer_irq_start_delay; + unsigned long long timer_irq_duration; + unsigned long long timer_exit_from_idle; + + /* + * Information about the last IRQ before the timerlat irq + * arrived. + * + * If now - timestamp is <= latency, it might have influenced + * in the timerlat irq latency. Otherwise, ignore it. + */ + unsigned long long prev_irq_duration; + unsigned long long prev_irq_timstamp; + + /* + * Interference sum. + */ + unsigned long long thread_nmi_sum; + unsigned long long thread_irq_sum; + unsigned long long thread_softirq_sum; + unsigned long long thread_thread_sum; + + /* + * Interference task information. + */ + struct trace_seq *prev_irqs_seq; + struct trace_seq *nmi_seq; + struct trace_seq *irqs_seq; + struct trace_seq *softirqs_seq; + struct trace_seq *threads_seq; + struct trace_seq *stack_seq; + + /* + * Current thread. + */ + char current_comm[MAX_COMM]; + unsigned long long current_pid; + + /* + * Is the system running a kworker? + */ + unsigned long long kworker; + unsigned long long kworker_func; +}; + +/* + * The analysis context and system wide view + */ +struct timerlat_aa_context { + int nr_cpus; + int dump_tasks; + + /* per CPU data */ + struct timerlat_aa_data *taa_data; + + /* + * required to translate function names and register + * events. + */ + struct osnoise_tool *tool; +}; + +/* + * The data is stored as a local variable, but accessed via a helper function. + * + * It could be stored inside the trace context. But every access would + * require container_of() + a series of pointers. Do we need it? Not sure. + * + * For now keep it simple. If needed, store it in the tool, add the *context + * as a parameter in timerlat_aa_get_ctx() and do the magic there. + */ +static struct timerlat_aa_context *__timerlat_aa_ctx; + +static struct timerlat_aa_context *timerlat_aa_get_ctx(void) +{ + return __timerlat_aa_ctx; +} + +/* + * timerlat_aa_get_data - Get the per-cpu data from the timerlat context + */ +static struct timerlat_aa_data +*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu) +{ + return &taa_ctx->taa_data[cpu]; +} + +/* + * timerlat_aa_irq_latency - Handles timerlat IRQ event + */ +static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, + struct trace_seq *s, struct tep_record *record, + struct tep_event *event) +{ + /* + * For interference, we start now looking for things that can delay + * the thread. + */ + taa_data->curr_state = TIMERLAT_WAITING_THREAD; + taa_data->tlat_irq_timstamp = record->ts; + + /* + * Zero values. + */ + taa_data->thread_nmi_sum = 0; + taa_data->thread_irq_sum = 0; + taa_data->thread_softirq_sum = 0; + taa_data->thread_thread_sum = 0; + taa_data->thread_blocking_duration = 0; + taa_data->timer_irq_start_time = 0; + taa_data->timer_irq_duration = 0; + taa_data->timer_exit_from_idle = 0; + + /* + * Zero interference tasks. + */ + trace_seq_reset(taa_data->nmi_seq); + trace_seq_reset(taa_data->irqs_seq); + trace_seq_reset(taa_data->softirqs_seq); + trace_seq_reset(taa_data->threads_seq); + + /* IRQ latency values */ + tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1); + tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1); + + /* The thread that can cause blocking */ + tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1); + + /* + * Get exit from idle case. + * + * If it is not idle thread: + */ + if (taa_data->run_thread_pid) + return 0; + + /* + * if the latency is shorter than the known exit from idle: + */ + if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency) + return 0; + + /* + * To be safe, ignore the cases in which an IRQ/NMI could have + * interfered with the timerlat IRQ. + */ + if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency + < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) + return 0; + + taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency; + + return 0; +} + +/* + * timerlat_aa_thread_latency - Handles timerlat thread event + */ +static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, + struct trace_seq *s, struct tep_record *record, + struct tep_event *event) +{ + /* + * For interference, we start now looking for things that can delay + * the IRQ of the next cycle. + */ + taa_data->curr_state = TIMERLAT_WAITING_IRQ; + taa_data->tlat_thread_timstamp = record->ts; + + /* Thread latency values */ + tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1); + tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1); + + return 0; +} + +/* + * timerlat_aa_handler - Handle timerlat events + * + * This function is called to handle timerlat events recording statistics. + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long thread; + + if (!taa_data) + return -1; + + tep_get_field_val(s, event, "context", record, &thread, 1); + if (!thread) + return timerlat_aa_irq_latency(taa_data, s, record, event); + else + return timerlat_aa_thread_latency(taa_data, s, record, event); +} + +/* + * timerlat_aa_nmi_handler - Handles NMI noise + * + * It is used to collect information about interferences from NMI. It is + * hooked to the osnoise:nmi_noise event. + */ +static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long start; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { + taa_data->prev_irq_duration = duration; + taa_data->prev_irq_timstamp = start; + + trace_seq_reset(taa_data->prev_irqs_seq); + trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n", + "nmi", + 24, spaces, + ns_to_usf(duration)); + return 0; + } + + taa_data->thread_nmi_sum += duration; + trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n", + "nmi", + 24, spaces, ns_to_usf(duration)); + + return 0; +} + +/* + * timerlat_aa_irq_handler - Handles IRQ noise + * + * It is used to collect information about interferences from IRQ. It is + * hooked to the osnoise:irq_noise event. + * + * It is a little bit more complex than the other because it measures: + * - The IRQs that can delay the timer IRQ before it happened. + * - The Timerlat IRQ handler + * - The IRQs that happened between the timerlat IRQ and the timerlat thread + * (IRQ interference). + */ +static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long expected_start; + unsigned long long duration; + unsigned long long vector; + unsigned long long start; + char *desc; + int val; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + tep_get_field_val(s, event, "vector", record, &vector, 1); + desc = tep_get_field_raw(s, event, "desc", record, &val, 1); + + /* + * Before the timerlat IRQ. + */ + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { + taa_data->prev_irq_duration = duration; + taa_data->prev_irq_timstamp = start; + + trace_seq_reset(taa_data->prev_irqs_seq); + trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + desc, vector, + 15, spaces, + ns_to_usf(duration)); + return 0; + } + + /* + * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at + * the timerlat irq handler. + */ + if (!taa_data->timer_irq_start_time) { + expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency; + + taa_data->timer_irq_start_time = start; + taa_data->timer_irq_duration = duration; + + /* + * We are dealing with two different clock sources: the + * external clock source that timerlat uses as a reference + * and the clock used by the tracer. There are also two + * moments: the time reading the clock and the timer in + * which the event is placed in the buffer (the trace + * event timestamp). If the processor is slow or there + * is some hardware noise, the difference between the + * timestamp and the external clock read can be longer + * than the IRQ handler delay, resulting in a negative + * time. If so, set IRQ start delay as 0. In the end, + * it is less relevant than the noise. + */ + if (expected_start < taa_data->timer_irq_start_time) + taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; + else + taa_data->timer_irq_start_delay = 0; + + /* + * not exit from idle. + */ + if (taa_data->run_thread_pid) + return 0; + + if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) + taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay; + + return 0; + } + + /* + * IRQ interference. + */ + taa_data->thread_irq_sum += duration; + trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + desc, vector, + 24, spaces, + ns_to_usf(duration)); + + return 0; +} + +static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", + "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; + + +/* + * timerlat_aa_softirq_handler - Handles Softirq noise + * + * It is used to collect information about interferences from Softirq. It is + * hooked to the osnoise:softirq_noise event. + * + * It is only printed in the non-rt kernel, as softirqs become thread on RT. + */ +static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long vector; + unsigned long long start; + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) + return 0; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + tep_get_field_val(s, event, "vector", record, &vector, 1); + + taa_data->thread_softirq_sum += duration; + + trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + softirq_name[vector], vector, + 24, spaces, + ns_to_usf(duration)); + return 0; +} + +/* + * timerlat_aa_softirq_handler - Handles thread noise + * + * It is used to collect information about interferences from threads. It is + * hooked to the osnoise:thread_noise event. + * + * Note: if you see thread noise, your timerlat thread was not the highest prio one. + */ +static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long long duration; + unsigned long long start; + unsigned long long pid; + const char *comm; + int val; + + if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) + return 0; + + tep_get_field_val(s, event, "duration", record, &duration, 1); + tep_get_field_val(s, event, "start", record, &start, 1); + + tep_get_common_field_val(s, event, "common_pid", record, &pid, 1); + comm = tep_get_field_raw(s, event, "comm", record, &val, 1); + + if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) { + taa_data->thread_blocking_duration = duration; + + if (comm) + strncpy(taa_data->run_thread_comm, comm, MAX_COMM); + else + sprintf(taa_data->run_thread_comm, "<...>"); + + } else { + taa_data->thread_thread_sum += duration; + + trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n", + comm, pid, + 15, spaces, + ns_to_usf(duration)); + } + + return 0; +} + +/* + * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace + * + * Saves and parse the stack trace generated by the timerlat IRQ. + */ +static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + unsigned long *caller; + const char *function; + int val, i; + + trace_seq_reset(taa_data->stack_seq); + + trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n"); + caller = tep_get_field_raw(s, event, "caller", record, &val, 1); + if (caller) { + for (i = 0; ; i++) { + function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); + if (!function) + break; + trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n", + 14, spaces, function); + } + } + return 0; +} + +/* + * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU + * + * Handles the sched:sched_switch event to trace the current thread running on the + * CPU. It is used to display the threads running on the other CPUs when the trace + * stops. + */ +static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + const char *comm; + int val; + + tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1); + comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1); + + strncpy(taa_data->current_comm, comm, MAX_COMM); + + /* + * If this was a kworker, clean the last kworkers that ran. + */ + taa_data->kworker = 0; + taa_data->kworker_func = 0; + + return 0; +} + +/* + * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU + * + * Handles workqueue:workqueue_execute_start event, keeping track of + * the job that a kworker could be doing in the CPU. + * + * We already catch problems of hardware related latencies caused by work queues + * running driver code that causes hardware stall. For example, with DRM drivers. + */ +static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); + + tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1); + tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1); + return 0; +} + +/* + * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing + * + * This is the core of the analysis. + */ +static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, + int irq_thresh, int thread_thresh) +{ + long long exp_irq_ts; + int total; + int irq; + + /* + * IRQ latency or Thread latency? + */ + if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) { + irq = 1; + total = taa_data->tlat_irq_latency; + } else { + irq = 0; + total = taa_data->tlat_thread_latency; + } + + /* + * Expected IRQ arrival time using the trace clock as the base. + * + * TODO: Add a list of previous IRQ, and then run the list backwards. + */ + exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; + if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { + if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) + printf(" Previous IRQ interference: %.*s up to %9.2f us\n", + 16, spaces, + ns_to_usf(taa_data->prev_irq_duration)); + } + + /* + * The delay that the IRQ suffered before starting. + */ + printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces, + (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", + ns_to_usf(taa_data->timer_irq_start_delay), + ns_to_per(total, taa_data->timer_irq_start_delay)); + + /* + * Timerlat IRQ. + */ + printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces, + ns_to_usf(taa_data->tlat_irq_latency)); + + if (irq) { + /* + * If the trace stopped due to IRQ, the other events will not happen + * because... the trace stopped :-). + * + * That is all folks, the stack trace was printed before the stop, + * so it will be displayed, it is the key. + */ + printf(" Blocking thread:\n"); + printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm, + taa_data->run_thread_pid); + } else { + /* + * The duration of the IRQ handler that handled the timerlat IRQ. + */ + printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n", + 30, spaces, + ns_to_usf(taa_data->timer_irq_duration), + ns_to_per(total, taa_data->timer_irq_duration)); + + /* + * The amount of time that the current thread postponed the scheduler. + * + * Recalling that it is net from NMI/IRQ/Softirq interference, so there + * is no need to compute values here. + */ + printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_blocking_duration), + ns_to_per(total, taa_data->thread_blocking_duration)); + + printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces, + taa_data->run_thread_comm, taa_data->run_thread_pid, + 12, spaces, ns_to_usf(taa_data->thread_blocking_duration)); + } + + /* + * Print the stack trace! + */ + trace_seq_do_printf(taa_data->stack_seq); + + /* + * NMIs can happen during the IRQ, so they are always possible. + */ + if (taa_data->thread_nmi_sum) + printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_nmi_sum), + ns_to_per(total, taa_data->thread_nmi_sum)); + + /* + * If it is an IRQ latency, the other factors can be skipped. + */ + if (irq) + goto print_total; + + /* + * Prints the interference caused by IRQs to the thread latency. + */ + if (taa_data->thread_irq_sum) { + printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_irq_sum), + ns_to_per(total, taa_data->thread_irq_sum)); + + trace_seq_do_printf(taa_data->irqs_seq); + } + + /* + * Prints the interference caused by Softirqs to the thread latency. + */ + if (taa_data->thread_softirq_sum) { + printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces, + ns_to_usf(taa_data->thread_softirq_sum), + ns_to_per(total, taa_data->thread_softirq_sum)); + + trace_seq_do_printf(taa_data->softirqs_seq); + } + + /* + * Prints the interference caused by other threads to the thread latency. + * + * If this happens, your timerlat is not the highest prio. OK, migration + * thread can happen. But otherwise, you are not measuring the "scheduling + * latency" only, and here is the difference from scheduling latency and + * timer handling latency. + */ + if (taa_data->thread_thread_sum) { + printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces, + ns_to_usf(taa_data->thread_thread_sum), + ns_to_per(total, taa_data->thread_thread_sum)); + + trace_seq_do_printf(taa_data->threads_seq); + } + + /* + * Done. + */ +print_total: + printf("------------------------------------------------------------------------\n"); + printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread", + 37, spaces, ns_to_usf(total)); +} + +static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) +{ + struct trace_instance *trace = &taa_ctx->tool->trace; + int retval; + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + return 0; + } + + return 1; +} + +/** + * timerlat_auto_analysis - Analyze the collected data + */ +void timerlat_auto_analysis(int irq_thresh, int thread_thresh) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + unsigned long long max_exit_from_idle = 0; + struct timerlat_aa_data *taa_data; + int max_exit_from_idle_cpu; + struct tep_handle *tep; + int cpu; + + timerlat_auto_analysis_collect_trace(taa_ctx); + + /* bring stop tracing to the ns scale */ + irq_thresh = irq_thresh * 1000; + thread_thresh = thread_thresh * 1000; + + for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { + taa_data = timerlat_aa_get_data(taa_ctx, cpu); + + if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) { + printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); + timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); + } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) { + printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); + timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); + } + + if (taa_data->max_exit_idle_latency > max_exit_from_idle) { + max_exit_from_idle = taa_data->max_exit_idle_latency; + max_exit_from_idle_cpu = cpu; + } + + } + + if (max_exit_from_idle) { + printf("\n"); + printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n", + ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu); + } + if (!taa_ctx->dump_tasks) + return; + + printf("\n"); + printf("Printing CPU tasks:\n"); + for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { + taa_data = timerlat_aa_get_data(taa_ctx, cpu); + tep = taa_ctx->tool->trace.tep; + + printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid); + + if (taa_data->kworker_func) + printf(" kworker:%s:%s", + tep_find_function(tep, taa_data->kworker) ? : "<...>", + tep_find_function(tep, taa_data->kworker_func)); + printf("\n"); + } + +} + +/* + * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data + */ +static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx) +{ + struct timerlat_aa_data *taa_data; + int i; + + if (!taa_ctx->taa_data) + return; + + for (i = 0; i < taa_ctx->nr_cpus; i++) { + taa_data = timerlat_aa_get_data(taa_ctx, i); + + if (taa_data->prev_irqs_seq) { + trace_seq_destroy(taa_data->prev_irqs_seq); + free(taa_data->prev_irqs_seq); + } + + if (taa_data->nmi_seq) { + trace_seq_destroy(taa_data->nmi_seq); + free(taa_data->nmi_seq); + } + + if (taa_data->irqs_seq) { + trace_seq_destroy(taa_data->irqs_seq); + free(taa_data->irqs_seq); + } + + if (taa_data->softirqs_seq) { + trace_seq_destroy(taa_data->softirqs_seq); + free(taa_data->softirqs_seq); + } + + if (taa_data->threads_seq) { + trace_seq_destroy(taa_data->threads_seq); + free(taa_data->threads_seq); + } + + if (taa_data->stack_seq) { + trace_seq_destroy(taa_data->stack_seq); + free(taa_data->stack_seq); + } + } +} + +/* + * timerlat_aa_init_seqs - Init seq files used to store parsed information + * + * Instead of keeping data structures to store raw data, use seq files to + * store parsed data. + * + * Allocates and initialize seq files. + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx) +{ + struct timerlat_aa_data *taa_data; + int i; + + for (i = 0; i < taa_ctx->nr_cpus; i++) { + + taa_data = timerlat_aa_get_data(taa_ctx, i); + + taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq)); + if (!taa_data->prev_irqs_seq) + goto out_err; + + trace_seq_init(taa_data->prev_irqs_seq); + + taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq)); + if (!taa_data->nmi_seq) + goto out_err; + + trace_seq_init(taa_data->nmi_seq); + + taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq)); + if (!taa_data->irqs_seq) + goto out_err; + + trace_seq_init(taa_data->irqs_seq); + + taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq)); + if (!taa_data->softirqs_seq) + goto out_err; + + trace_seq_init(taa_data->softirqs_seq); + + taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq)); + if (!taa_data->threads_seq) + goto out_err; + + trace_seq_init(taa_data->threads_seq); + + taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq)); + if (!taa_data->stack_seq) + goto out_err; + + trace_seq_init(taa_data->stack_seq); + } + + return 0; + +out_err: + timerlat_aa_destroy_seqs(taa_ctx); + return -1; +} + +/* + * timerlat_aa_unregister_events - Unregister events used in the auto-analysis + */ +static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) +{ + + tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_aa_handler, tool); + + tracefs_event_disable(tool->trace.inst, "osnoise", NULL); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", + timerlat_aa_nmi_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", + timerlat_aa_irq_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", + timerlat_aa_softirq_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", + timerlat_aa_thread_handler, tool); + + tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", + timerlat_aa_stack_handler, tool); + if (!dump_tasks) + return; + + tracefs_event_disable(tool->trace.inst, "sched", "sched_switch"); + tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch", + timerlat_aa_sched_switch_handler, tool); + + tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start"); + tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", + timerlat_aa_kworker_start_handler, tool); +} + +/* + * timerlat_aa_register_events - Register events used in the auto-analysis + * + * Returns 0 on success, -1 otherwise. + */ +static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks) +{ + int retval; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_aa_handler, tool); + + + /* + * register auto-analysis handlers. + */ + retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", + timerlat_aa_nmi_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", + timerlat_aa_irq_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", + timerlat_aa_softirq_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", + timerlat_aa_thread_handler, tool); + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", + timerlat_aa_stack_handler, tool); + + if (!dump_tasks) + return 0; + + /* + * Dump task events. + */ + retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch"); + if (retval < 0 && !errno) { + err_msg("Could not find sched_switch\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch", + timerlat_aa_sched_switch_handler, tool); + + retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start"); + if (retval < 0 && !errno) { + err_msg("Could not find workqueue_execute_start\n"); + goto out_err; + } + + tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", + timerlat_aa_kworker_start_handler, tool); + + return 0; + +out_err: + timerlat_aa_unregister_events(tool, dump_tasks); + return -1; +} + +/** + * timerlat_aa_destroy - Destroy timerlat auto-analysis + */ +void timerlat_aa_destroy(void) +{ + struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); + + if (!taa_ctx) + return; + + if (!taa_ctx->taa_data) + goto out_ctx; + + timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks); + timerlat_aa_destroy_seqs(taa_ctx); + free(taa_ctx->taa_data); +out_ctx: + free(taa_ctx); +} + +/** + * timerlat_aa_init - Initialize timerlat auto-analysis + * + * Returns 0 on success, -1 otherwise. + */ +int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + struct timerlat_aa_context *taa_ctx; + int retval; + + taa_ctx = calloc(1, sizeof(*taa_ctx)); + if (!taa_ctx) + return -1; + + __timerlat_aa_ctx = taa_ctx; + + taa_ctx->nr_cpus = nr_cpus; + taa_ctx->tool = tool; + taa_ctx->dump_tasks = dump_tasks; + + taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data)); + if (!taa_ctx->taa_data) + goto out_err; + + retval = timerlat_aa_init_seqs(taa_ctx); + if (retval) + goto out_err; + + retval = timerlat_aa_register_events(tool, dump_tasks); + if (retval) + goto out_err; + + return 0; + +out_err: + timerlat_aa_destroy(); + return -1; +} diff --git a/tools/tracing/rtla/src/timerlat_aa.h b/tools/tracing/rtla/src/timerlat_aa.h new file mode 100644 index 000000000000..cea4bb1531a8 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_aa.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +int timerlat_aa_init(struct osnoise_tool *tool, int dump_task); +void timerlat_aa_destroy(void); + +void timerlat_auto_analysis(int irq_thresh, int thread_thresh); diff --git a/tools/tracing/rtla/src/timerlat_bpf.c b/tools/tracing/rtla/src/timerlat_bpf.c new file mode 100644 index 000000000000..e97d16646bcd --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_bpf.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifdef HAVE_BPF_SKEL +#define _GNU_SOURCE +#include "timerlat.h" +#include "timerlat_bpf.h" +#include "timerlat.skel.h" + +static struct timerlat_bpf *bpf; + +/* + * timerlat_bpf_init - load and initialize BPF program to collect timerlat data + */ +int timerlat_bpf_init(struct timerlat_params *params) +{ + int err; + + debug_msg("Loading BPF program\n"); + + bpf = timerlat_bpf__open(); + if (!bpf) + return 1; + + /* Pass common options */ + bpf->rodata->output_divisor = params->common.output_divisor; + bpf->rodata->entries = params->common.hist.entries; + bpf->rodata->irq_threshold = params->common.stop_us; + bpf->rodata->thread_threshold = params->common.stop_total_us; + bpf->rodata->aa_only = params->common.aa_only; + + if (params->common.hist.entries != 0) { + /* Pass histogram options */ + bpf->rodata->bucket_size = params->common.hist.bucket_size; + + /* Set histogram array sizes */ + bpf_map__set_max_entries(bpf->maps.hist_irq, params->common.hist.entries); + bpf_map__set_max_entries(bpf->maps.hist_thread, params->common.hist.entries); + bpf_map__set_max_entries(bpf->maps.hist_user, params->common.hist.entries); + } else { + /* No entries, disable histogram */ + bpf_map__set_autocreate(bpf->maps.hist_irq, false); + bpf_map__set_autocreate(bpf->maps.hist_thread, false); + bpf_map__set_autocreate(bpf->maps.hist_user, false); + } + + if (params->common.aa_only) { + /* Auto-analysis only, disable summary */ + bpf_map__set_autocreate(bpf->maps.summary_irq, false); + bpf_map__set_autocreate(bpf->maps.summary_thread, false); + bpf_map__set_autocreate(bpf->maps.summary_user, false); + } + + /* Load and verify BPF program */ + err = timerlat_bpf__load(bpf); + if (err) { + timerlat_bpf__destroy(bpf); + return err; + } + + return 0; +} + +/* + * timerlat_bpf_attach - attach BPF program to collect timerlat data + */ +int timerlat_bpf_attach(void) +{ + debug_msg("Attaching BPF program\n"); + + return timerlat_bpf__attach(bpf); +} + +/* + * timerlat_bpf_detach - detach BPF program to collect timerlat data + */ +void timerlat_bpf_detach(void) +{ + timerlat_bpf__detach(bpf); +} + +/* + * timerlat_bpf_detach - destroy BPF program to collect timerlat data + */ +void timerlat_bpf_destroy(void) +{ + timerlat_bpf__destroy(bpf); +} + +static int handle_rb_event(void *ctx, void *data, size_t data_sz) +{ + return 0; +} + +/* + * timerlat_bpf_wait - wait until tracing is stopped or signal + */ +int timerlat_bpf_wait(int timeout) +{ + struct ring_buffer *rb; + int retval; + + rb = ring_buffer__new(bpf_map__fd(bpf->maps.signal_stop_tracing), + handle_rb_event, NULL, NULL); + retval = ring_buffer__poll(rb, timeout * 1000); + ring_buffer__free(rb); + + return retval; +} + +/* + * timerlat_bpf_restart_tracing - restart stopped tracing + */ +int timerlat_bpf_restart_tracing(void) +{ + unsigned int key = 0; + unsigned long long value = 0; + + return bpf_map__update_elem(bpf->maps.stop_tracing, + &key, sizeof(key), + &value, sizeof(value), BPF_ANY); +} + +static int get_value(struct bpf_map *map_irq, + struct bpf_map *map_thread, + struct bpf_map *map_user, + int key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + int err; + + err = bpf_map__lookup_elem(map_irq, &key, + sizeof(unsigned int), value_irq, + sizeof(long long) * cpus, 0); + if (err) + return err; + err = bpf_map__lookup_elem(map_thread, &key, + sizeof(unsigned int), value_thread, + sizeof(long long) * cpus, 0); + if (err) + return err; + err = bpf_map__lookup_elem(map_user, &key, + sizeof(unsigned int), value_user, + sizeof(long long) * cpus, 0); + if (err) + return err; + return 0; +} + +/* + * timerlat_bpf_get_hist_value - get value from BPF hist map + */ +int timerlat_bpf_get_hist_value(int key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + return get_value(bpf->maps.hist_irq, + bpf->maps.hist_thread, + bpf->maps.hist_user, + key, value_irq, value_thread, value_user, cpus); +} + +/* + * timerlat_bpf_get_summary_value - get value from BPF summary map + */ +int timerlat_bpf_get_summary_value(enum summary_field key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + return get_value(bpf->maps.summary_irq, + bpf->maps.summary_thread, + bpf->maps.summary_user, + key, value_irq, value_thread, value_user, cpus); +} +#endif /* HAVE_BPF_SKEL */ diff --git a/tools/tracing/rtla/src/timerlat_bpf.h b/tools/tracing/rtla/src/timerlat_bpf.h new file mode 100644 index 000000000000..118487436d30 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_bpf.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#pragma once + +enum summary_field { + SUMMARY_CURRENT, + SUMMARY_MIN, + SUMMARY_MAX, + SUMMARY_COUNT, + SUMMARY_SUM, + SUMMARY_OVERFLOW, + SUMMARY_FIELD_N +}; + +#ifndef __bpf__ +#ifdef HAVE_BPF_SKEL +int timerlat_bpf_init(struct timerlat_params *params); +int timerlat_bpf_attach(void); +void timerlat_bpf_detach(void); +void timerlat_bpf_destroy(void); +int timerlat_bpf_wait(int timeout); +int timerlat_bpf_restart_tracing(void); +int timerlat_bpf_get_hist_value(int key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus); +int timerlat_bpf_get_summary_value(enum summary_field key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus); + +static inline int have_libbpf_support(void) { return 1; } +#else +static inline int timerlat_bpf_init(struct timerlat_params *params) +{ + return -1; +} +static inline int timerlat_bpf_attach(void) { return -1; } +static inline void timerlat_bpf_detach(void) { }; +static inline void timerlat_bpf_destroy(void) { }; +static inline int timerlat_bpf_wait(int timeout) { return -1; } +static inline int timerlat_bpf_restart_tracing(void) { return -1; }; +static inline int timerlat_bpf_get_hist_value(int key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + return -1; +} +static inline int timerlat_bpf_get_summary_value(enum summary_field key, + long long *value_irq, + long long *value_thread, + long long *value_user, + int cpus) +{ + return -1; +} +static inline int have_libbpf_support(void) { return 0; } +#endif /* HAVE_BPF_SKEL */ +#endif /* __bpf__ */ diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 4b48af8a8309..1fb471a787b7 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -10,44 +11,21 @@ #include <unistd.h> #include <stdio.h> #include <time.h> +#include <sched.h> +#include <pthread.h> -#include "utils.h" -#include "osnoise.h" #include "timerlat.h" - -struct timerlat_hist_params { - char *cpus; - char *monitored_cpus; - char *trace_output; - unsigned long long runtime; - long long stop_us; - long long stop_total_us; - long long timerlat_period_us; - long long print_stack; - int sleep_time; - int output_divisor; - int duration; - int set_sched; - int dma_latency; - struct sched_attr sched_param; - struct trace_events *events; - - char no_irq; - char no_thread; - char no_header; - char no_summary; - char no_index; - char with_zeros; - int bucket_size; - int entries; -}; +#include "timerlat_aa.h" +#include "timerlat_bpf.h" struct timerlat_hist_cpu { int *irq; int *thread; + int *user; - int irq_count; - int thread_count; + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; unsigned long long min_irq; unsigned long long sum_irq; @@ -56,6 +34,10 @@ struct timerlat_hist_cpu { unsigned long long min_thread; unsigned long long sum_thread; unsigned long long max_thread; + + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; }; struct timerlat_hist_data { @@ -80,13 +62,21 @@ timerlat_free_histogram(struct timerlat_hist_data *data) if (data->hist[cpu].thread) free(data->hist[cpu].thread); + + if (data->hist[cpu].user) + free(data->hist[cpu].user); + } /* one set of histograms per CPU */ if (data->hist) free(data->hist); +} - free(data); +static void timerlat_free_histogram_tool(struct osnoise_tool *tool) +{ + timerlat_free_histogram(tool->data); + timerlat_free(tool); } /* @@ -116,15 +106,21 @@ static struct timerlat_hist_data data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); if (!data->hist[cpu].irq) goto cleanup; + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); if (!data->hist[cpu].thread) goto cleanup; + + data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1)); + if (!data->hist[cpu].user) + goto cleanup; } /* set the min to max */ for (cpu = 0; cpu < nr_cpus; cpu++) { data->hist[cpu].min_irq = ~0; data->hist[cpu].min_thread = ~0; + data->hist[cpu].min_user = ~0; } return data; @@ -139,33 +135,38 @@ cleanup: */ static void timerlat_hist_update(struct osnoise_tool *tool, int cpu, - unsigned long long thread, + unsigned long long context, unsigned long long latency) { - struct timerlat_hist_params *params = tool->params; + struct timerlat_params *params = to_timerlat_params(tool->params); struct timerlat_hist_data *data = tool->data; int entries = data->entries; int bucket; int *hist; - if (params->output_divisor) - latency = latency / params->output_divisor; + if (params->common.output_divisor) + latency = latency / params->common.output_divisor; - if (data->bucket_size) - bucket = latency / data->bucket_size; + bucket = latency / data->bucket_size; - if (!thread) { + if (!context) { hist = data->hist[cpu].irq; data->hist[cpu].irq_count++; update_min(&data->hist[cpu].min_irq, &latency); update_sum(&data->hist[cpu].sum_irq, &latency); update_max(&data->hist[cpu].max_irq, &latency); - } else { + } else if (context == 1) { hist = data->hist[cpu].thread; data->hist[cpu].thread_count++; update_min(&data->hist[cpu].min_thread, &latency); update_sum(&data->hist[cpu].sum_thread, &latency); update_max(&data->hist[cpu].max_thread, &latency); + } else { /* user */ + hist = data->hist[cpu].user; + data->hist[cpu].user_count++; + update_min(&data->hist[cpu].min_user, &latency); + update_sum(&data->hist[cpu].sum_user, &latency); + update_max(&data->hist[cpu].max_user, &latency); } if (bucket < entries) @@ -182,16 +183,99 @@ timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *data) { struct trace_instance *trace = data; - unsigned long long thread, latency; + unsigned long long context, latency; struct osnoise_tool *tool; int cpu = record->cpu; tool = container_of(trace, struct osnoise_tool, trace); - tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "context", record, &context, 1); tep_get_field_val(s, event, "timer_latency", record, &latency, 1); - timerlat_hist_update(tool, cpu, thread, latency); + timerlat_hist_update(tool, cpu, context, latency); + + return 0; +} + +/* + * timerlat_hist_bpf_pull_data - copy data from BPF maps into userspace + */ +static int timerlat_hist_bpf_pull_data(struct osnoise_tool *tool) +{ + struct timerlat_hist_data *data = tool->data; + int i, j, err; + long long value_irq[data->nr_cpus], + value_thread[data->nr_cpus], + value_user[data->nr_cpus]; + + /* Pull histogram */ + for (i = 0; i < data->entries; i++) { + err = timerlat_bpf_get_hist_value(i, value_irq, value_thread, + value_user, data->nr_cpus); + if (err) + return err; + for (j = 0; j < data->nr_cpus; j++) { + data->hist[j].irq[i] = value_irq[j]; + data->hist[j].thread[i] = value_thread[j]; + data->hist[j].user[i] = value_user[j]; + } + } + + /* Pull summary */ + err = timerlat_bpf_get_summary_value(SUMMARY_COUNT, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].irq_count = value_irq[i]; + data->hist[i].thread_count = value_thread[i]; + data->hist[i].user_count = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_MIN, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].min_irq = value_irq[i]; + data->hist[i].min_thread = value_thread[i]; + data->hist[i].min_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_MAX, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].max_irq = value_irq[i]; + data->hist[i].max_thread = value_thread[i]; + data->hist[i].max_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_SUM, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].sum_irq = value_irq[i]; + data->hist[i].sum_thread = value_thread[i]; + data->hist[i].sum_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_OVERFLOW, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->hist[i].irq[data->entries] = value_irq[i]; + data->hist[i].thread[data->entries] = value_thread[i]; + data->hist[i].user[data->entries] = value_user[i]; + } return 0; } @@ -201,38 +285,39 @@ timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, */ static void timerlat_hist_header(struct osnoise_tool *tool) { - struct timerlat_hist_params *params = tool->params; + struct timerlat_params *params = to_timerlat_params(tool->params); struct timerlat_hist_data *data = tool->data; struct trace_seq *s = tool->trace.seq; char duration[26]; int cpu; - if (params->no_header) + if (params->common.hist.no_header) return; get_duration(tool->start_time, duration, sizeof(duration)); trace_seq_printf(s, "# RTLA timerlat histogram\n"); trace_seq_printf(s, "# Time unit is %s (%s)\n", - params->output_divisor == 1 ? "nanoseconds" : "microseconds", - params->output_divisor == 1 ? "ns" : "us"); + params->common.output_divisor == 1 ? "nanoseconds" : "microseconds", + params->common.output_divisor == 1 ? "ns" : "us"); trace_seq_printf(s, "# Duration: %s\n", duration); - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(s, "Index"); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) + if (!params->common.hist.no_irq) trace_seq_printf(s, " IRQ-%03d", cpu); - if (!params->no_thread) + if (!params->common.hist.no_thread) trace_seq_printf(s, " Thr-%03d", cpu); + + if (params->common.user_data) + trace_seq_printf(s, " Usr-%03d", cpu); } trace_seq_printf(s, "\n"); @@ -242,115 +327,292 @@ static void timerlat_hist_header(struct osnoise_tool *tool) } /* + * format_summary_value - format a line of summary value (min, max or avg) + * of hist data + */ +static void format_summary_value(struct trace_seq *seq, + int count, + unsigned long long val, + bool avg) +{ + if (count) + trace_seq_printf(seq, "%9llu ", avg ? val / count : val); + else + trace_seq_printf(seq, "%9c ", '-'); +} + +/* * timerlat_print_summary - print the summary of the hist data to the output */ static void -timerlat_print_summary(struct timerlat_hist_params *params, +timerlat_print_summary(struct timerlat_params *params, struct trace_instance *trace, struct timerlat_hist_data *data) { int cpu; - if (params->no_summary) + if (params->common.hist.no_summary) return; - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "count:"); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) - trace_seq_printf(trace->seq, "%9d ", + if (!params->common.hist.no_irq) + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].irq_count); - if (!params->no_thread) - trace_seq_printf(trace->seq, "%9d ", + if (!params->common.hist.no_thread) + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].thread_count); + + if (params->common.user_data) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].user_count); } trace_seq_printf(trace->seq, "\n"); - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "min: "); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_irq); - - if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_thread); + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].min_irq, + false); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].min_thread, + false); + + if (params->common.user_data) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].min_user, + false); } trace_seq_printf(trace->seq, "\n"); - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "avg: "); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_irq / data->hist[cpu].irq_count); - else - trace_seq_printf(trace->seq, " - "); - } - - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_thread / data->hist[cpu].thread_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].sum_irq, + true); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].sum_thread, + true); + + if (params->common.user_data) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].sum_user, + true); } trace_seq_printf(trace->seq, "\n"); - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "max: "); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].max_irq, + false); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].max_thread, + false); + + if (params->common.user_data) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].max_user, + false); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +static void +timerlat_print_stats_all(struct timerlat_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + struct timerlat_hist_cpu *cpu_data; + struct timerlat_hist_cpu sum; + int cpu; + + if (params->common.hist.no_summary) + return; + + memset(&sum, 0, sizeof(sum)); + sum.min_irq = ~0; + sum.min_thread = ~0; + sum.min_user = ~0; + + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_irq); + cpu_data = &data->hist[cpu]; - if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_thread); + sum.irq_count += cpu_data->irq_count; + update_min(&sum.min_irq, &cpu_data->min_irq); + update_sum(&sum.sum_irq, &cpu_data->sum_irq); + update_max(&sum.max_irq, &cpu_data->max_irq); + + sum.thread_count += cpu_data->thread_count; + update_min(&sum.min_thread, &cpu_data->min_thread); + update_sum(&sum.sum_thread, &cpu_data->sum_thread); + update_max(&sum.max_thread, &cpu_data->max_thread); + + sum.user_count += cpu_data->user_count; + update_min(&sum.min_user, &cpu_data->min_user); + update_sum(&sum.sum_user, &cpu_data->sum_user); + update_max(&sum.max_user, &cpu_data->max_user); } + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "ALL: "); + + if (!params->common.hist.no_irq) + trace_seq_printf(trace->seq, " IRQ"); + + if (!params->common.hist.no_thread) + trace_seq_printf(trace->seq, " Thr"); + + if (params->common.user_data) + trace_seq_printf(trace->seq, " Usr"); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "count:"); + + if (!params->common.hist.no_irq) + trace_seq_printf(trace->seq, "%9llu ", + sum.irq_count); + + if (!params->common.hist.no_thread) + trace_seq_printf(trace->seq, "%9llu ", + sum.thread_count); + + if (params->common.user_data) + trace_seq_printf(trace->seq, "%9llu ", + sum.user_count); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "min: "); + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + sum.irq_count, + sum.min_irq, + false); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + sum.thread_count, + sum.min_thread, + false); + + if (params->common.user_data) + format_summary_value(trace->seq, + sum.user_count, + sum.min_user, + false); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "avg: "); + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + sum.irq_count, + sum.sum_irq, + true); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + sum.thread_count, + sum.sum_thread, + true); + + if (params->common.user_data) + format_summary_value(trace->seq, + sum.user_count, + sum.sum_user, + true); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->common.hist.no_index) + trace_seq_printf(trace->seq, "max: "); + + if (!params->common.hist.no_irq) + format_summary_value(trace->seq, + sum.irq_count, + sum.max_irq, + false); + + if (!params->common.hist.no_thread) + format_summary_value(trace->seq, + sum.thread_count, + sum.max_thread, + false); + + if (params->common.user_data) + format_summary_value(trace->seq, + sum.user_count, + sum.max_user, + false); + trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); } /* - * timerlat_print_stats - print data for all CPUs + * timerlat_print_stats - print data for each CPUs */ static void -timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool) +timerlat_print_stats(struct osnoise_tool *tool) { + struct timerlat_params *params = to_timerlat_params(tool->params); struct timerlat_hist_data *data = tool->data; struct trace_instance *trace = &tool->trace; int bucket, cpu; @@ -361,32 +623,36 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t for (bucket = 0; bucket < data->entries; bucket++) { total = 0; - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "%-6d", bucket * data->bucket_size); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { + if (!params->common.hist.no_irq) { total += data->hist[cpu].irq[bucket]; trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].irq[bucket]); } - if (!params->no_thread) { + if (!params->common.hist.no_thread) { total += data->hist[cpu].thread[bucket]; trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].thread[bucket]); } + if (params->common.user_data) { + total += data->hist[cpu].user[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[bucket]); + } + } - if (total == 0 && !params->with_zeros) { + if (total == 0 && !params->common.hist.with_zeros) { trace_seq_reset(trace->seq); continue; } @@ -396,44 +662,49 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t trace_seq_reset(trace->seq); } - if (!params->no_index) + if (!params->common.hist.no_index) trace_seq_printf(trace->seq, "over: "); - for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) - continue; + for_each_monitored_cpu(cpu, data->nr_cpus, ¶ms->common) { if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) + if (!params->common.hist.no_irq) trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].irq[data->entries]); - if (!params->no_thread) + if (!params->common.hist.no_thread) trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].thread[data->entries]); + + if (params->common.user_data) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[data->entries]); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); timerlat_print_summary(params, trace, data); + timerlat_print_stats_all(params, trace, data); + osnoise_report_missed_events(tool); } /* * timerlat_hist_usage - prints timerlat top usage message */ -static void timerlat_hist_usage(char *usage) +static void timerlat_hist_usage(void) { int i; char *msg[] = { "", " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\", - " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\", + " [-t [file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", - " [--no-index] [--with-zeros] [--dma-latency us]", + " [--no-index] [--with-zeros] [--dma-latency us] [-C [cgroup_name]] [--no-aa] [--dump-task] [-u|-k]", + " [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -442,13 +713,17 @@ static void timerlat_hist_usage(char *usage) " -T/--thread us: stop trace if the thread latency is higher than the argument in us", " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup [cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[m|h|d]: duration of the session in seconds", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", " -D/--debug: print debug info", - " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -t/--trace [file]: save the stopped trace to [file|timerlat_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <filter>: enable a trace event filter to the previous -e event", " --trigger <trigger>: enable a trace event trigger to the previous -e event", " -n/--nano: display data in nanoseconds", + " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", " -b/--bucket-size N: set the histogram bucket size (default 1)", " -E/--entries N: set the number of entries of the histogram (default 256)", " --no-irq: ignore IRQ latencies", @@ -464,52 +739,70 @@ static void timerlat_hist_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads", + " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads", + " -U/--user-load: enable timerlat for user-defined user-space workload", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", + " --on-threshold <action>: define action to be executed at latency threshold, multiple are allowed", + " --on-end <action>: define action to be executed at measurement end, multiple are allowed", NULL, }; - if (usage) - fprintf(stderr, "%s\n", usage); - fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n", VERSION); for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + exit(EXIT_SUCCESS); } /* * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters */ -static struct timerlat_hist_params +static struct common_params *timerlat_hist_parse_args(int argc, char *argv[]) { - struct timerlat_hist_params *params; + struct timerlat_params *params; struct trace_events *tevent; int auto_thresh; int retval; int c; + char *trace_output = NULL; params = calloc(1, sizeof(*params)); if (!params) exit(1); + actions_init(¶ms->common.threshold_actions); + actions_init(¶ms->common.end_actions); + /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ - params->output_divisor = 1000; - params->bucket_size = 1; - params->entries = 256; + params->common.output_divisor = 1000; + params->common.hist.bucket_size = 1; + params->common.hist.entries = 256; + + /* default to BPF mode */ + params->mode = TRACING_MODE_BPF; while (1) { static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"bucket-size", required_argument, 0, 'b'}, {"debug", no_argument, 0, 'D'}, {"entries", required_argument, 0, 'E'}, {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, {"help", no_argument, 0, 'h'}, {"irq", required_argument, 0, 'i'}, {"nano", no_argument, 0, 'n'}, @@ -518,6 +811,9 @@ static struct timerlat_hist_params {"stack", required_argument, 0, 's'}, {"thread", required_argument, 0, 'T'}, {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, + {"kernel-threads", no_argument, 0, 'k'}, + {"user-load", no_argument, 0, 'U'}, {"event", required_argument, 0, 'e'}, {"no-irq", no_argument, 0, '0'}, {"no-thread", no_argument, 0, '1'}, @@ -528,14 +824,18 @@ static struct timerlat_hist_params {"trigger", required_argument, 0, '6'}, {"filter", required_argument, 0, '7'}, {"dma-latency", required_argument, 0, '8'}, + {"no-aa", no_argument, 0, '9'}, + {"dump-task", no_argument, 0, '\1'}, + {"warm-up", required_argument, 0, '\2'}, + {"trace-buffer-size", required_argument, 0, '\3'}, + {"deepest-idle-state", required_argument, 0, '\4'}, + {"on-threshold", required_argument, 0, '\5'}, + {"on-end", required_argument, 0, '\6'}, {0, 0, 0, 0} }; - /* getopt_long stores the option index here. */ - int option_index = 0; - - c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhi:np:P:s:t::T:0123456:7:8:", - long_options, &option_index); + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3:", + long_options, NULL); /* detect the end of the options. */ if (c == -1) @@ -546,201 +846,223 @@ static struct timerlat_hist_params auto_thresh = get_llong_from_str(optarg); /* set thread stop to auto_thresh */ - params->stop_total_us = auto_thresh; + params->common.stop_total_us = auto_thresh; + params->common.stop_us = auto_thresh; /* get stack trace */ params->print_stack = auto_thresh; /* set trace */ - params->trace_output = "timerlat_trace.txt"; + if (!trace_output) + trace_output = "timerlat_trace.txt"; break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->common.monitored_cpus); if (retval) - timerlat_hist_usage("\nInvalid -c cpu list\n"); - params->cpus = optarg; + fatal("Invalid -c cpu list"); + params->common.cpus = optarg; + break; + case 'C': + params->common.cgroup = 1; + params->common.cgroup_name = parse_optional_arg(argc, argv); break; case 'b': - params->bucket_size = get_llong_from_str(optarg); - if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) - timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + params->common.hist.bucket_size = get_llong_from_str(optarg); + if (params->common.hist.bucket_size == 0 || + params->common.hist.bucket_size >= 1000000) + fatal("Bucket size needs to be > 0 and <= 1000000"); break; case 'D': config_debug = 1; break; case 'd': - params->duration = parse_seconds_duration(optarg); - if (!params->duration) - timerlat_hist_usage("Invalid -D duration\n"); + params->common.duration = parse_seconds_duration(optarg); + if (!params->common.duration) + fatal("Invalid -D duration"); break; case 'e': tevent = trace_event_alloc(optarg); - if (!tevent) { - err_msg("Error alloc trace event"); - exit(EXIT_FAILURE); - } + if (!tevent) + fatal("Error alloc trace event"); - if (params->events) - tevent->next = params->events; + if (params->common.events) + tevent->next = params->common.events; - params->events = tevent; + params->common.events = tevent; break; case 'E': - params->entries = get_llong_from_str(optarg); - if ((params->entries < 10) || (params->entries > 9999999)) - timerlat_hist_usage("Entries must be > 10 and < 9999999\n"); + params->common.hist.entries = get_llong_from_str(optarg); + if (params->common.hist.entries < 10 || + params->common.hist.entries > 9999999) + fatal("Entries must be > 10 and < 9999999"); break; case 'h': case '?': - timerlat_hist_usage(NULL); + timerlat_hist_usage(); + break; + case 'H': + params->common.hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->common.hk_cpu_set); + if (retval) + fatal("Error parsing house keeping CPUs"); break; case 'i': - params->stop_us = get_llong_from_str(optarg); + params->common.stop_us = get_llong_from_str(optarg); + break; + case 'k': + params->common.kernel_workload = 1; break; case 'n': - params->output_divisor = 1; + params->common.output_divisor = 1; break; case 'p': params->timerlat_period_us = get_llong_from_str(optarg); if (params->timerlat_period_us > 1000000) - timerlat_hist_usage("Period longer than 1 s\n"); + fatal("Period longer than 1 s"); break; case 'P': - retval = parse_prio(optarg, ¶ms->sched_param); + retval = parse_prio(optarg, ¶ms->common.sched_param); if (retval == -1) - timerlat_hist_usage("Invalid -P priority"); - params->set_sched = 1; + fatal("Invalid -P priority"); + params->common.set_sched = 1; break; case 's': params->print_stack = get_llong_from_str(optarg); break; case 'T': - params->stop_total_us = get_llong_from_str(optarg); + params->common.stop_total_us = get_llong_from_str(optarg); break; case 't': - if (optarg) - /* skip = */ - params->trace_output = &optarg[1]; - else - params->trace_output = "timerlat_trace.txt"; + trace_output = parse_optional_arg(argc, argv); + if (!trace_output) + trace_output = "timerlat_trace.txt"; + break; + case 'u': + params->common.user_workload = 1; + /* fallback: -u implies in -U */ + case 'U': + params->common.user_data = 1; break; case '0': /* no irq */ - params->no_irq = 1; + params->common.hist.no_irq = 1; break; case '1': /* no thread */ - params->no_thread = 1; + params->common.hist.no_thread = 1; break; case '2': /* no header */ - params->no_header = 1; + params->common.hist.no_header = 1; break; case '3': /* no summary */ - params->no_summary = 1; + params->common.hist.no_summary = 1; break; case '4': /* no index */ - params->no_index = 1; + params->common.hist.no_index = 1; break; case '5': /* with zeros */ - params->with_zeros = 1; + params->common.hist.with_zeros = 1; break; case '6': /* trigger */ - if (params->events) { - retval = trace_event_add_trigger(params->events, optarg); - if (retval) { - err_msg("Error adding trigger %s\n", optarg); - exit(EXIT_FAILURE); - } + if (params->common.events) { + retval = trace_event_add_trigger(params->common.events, optarg); + if (retval) + fatal("Error adding trigger %s", optarg); } else { - timerlat_hist_usage("--trigger requires a previous -e\n"); + fatal("--trigger requires a previous -e"); } break; case '7': /* filter */ - if (params->events) { - retval = trace_event_add_filter(params->events, optarg); - if (retval) { - err_msg("Error adding filter %s\n", optarg); - exit(EXIT_FAILURE); - } + if (params->common.events) { + retval = trace_event_add_filter(params->common.events, optarg); + if (retval) + fatal("Error adding filter %s", optarg); } else { - timerlat_hist_usage("--filter requires a previous -e\n"); + fatal("--filter requires a previous -e"); } break; case '8': params->dma_latency = get_llong_from_str(optarg); - if (params->dma_latency < 0 || params->dma_latency > 10000) { - err_msg("--dma-latency needs to be >= 0 and < 10000"); - exit(EXIT_FAILURE); - } + if (params->dma_latency < 0 || params->dma_latency > 10000) + fatal("--dma-latency needs to be >= 0 and < 10000"); + break; + case '9': + params->no_aa = 1; + break; + case '\1': + params->dump_tasks = 1; + break; + case '\2': + params->common.warmup = get_llong_from_str(optarg); + break; + case '\3': + params->common.buffer_size = get_llong_from_str(optarg); + break; + case '\4': + params->deepest_idle_state = get_llong_from_str(optarg); + break; + case '\5': + retval = actions_parse(¶ms->common.threshold_actions, optarg, + "timerlat_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + case '\6': + retval = actions_parse(¶ms->common.end_actions, optarg, + "timerlat_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); break; default: - timerlat_hist_usage("Invalid option"); + fatal("Invalid option"); } } - if (geteuid()) { - err_msg("rtla needs root permission\n"); - exit(EXIT_FAILURE); - } + if (trace_output) + actions_add_trace_output(¶ms->common.threshold_actions, trace_output); + + if (geteuid()) + fatal("rtla needs root permission"); + + if (params->common.hist.no_irq && params->common.hist.no_thread) + fatal("no-irq and no-thread set, there is nothing to do here"); + + if (params->common.hist.no_index && !params->common.hist.with_zeros) + fatal("no-index set with with-zeros is not set - it does not make sense"); - if (params->no_irq && params->no_thread) - timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here"); + /* + * Auto analysis only happens if stop tracing, thus: + */ + if (!params->common.stop_us && !params->common.stop_total_us) + params->no_aa = 1; - if (params->no_index && !params->with_zeros) - timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense"); + if (params->common.kernel_workload && params->common.user_workload) + fatal("--kernel-threads and --user-threads are mutually exclusive!"); - return params; + /* + * If auto-analysis or trace output is enabled, switch from BPF mode to + * mixed mode + */ + if (params->mode == TRACING_MODE_BPF && + (params->common.threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->common.end_actions.present[ACTION_TRACE_OUTPUT] || + !params->no_aa)) + params->mode = TRACING_MODE_MIXED; + + return ¶ms->common; } /* * timerlat_hist_apply_config - apply the hist configs to the initialized tool */ static int -timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) +timerlat_hist_apply_config(struct osnoise_tool *tool) { + struct timerlat_params *params = to_timerlat_params(tool->params); int retval; - if (!params->sleep_time) - params->sleep_time = 1; - - if (params->cpus) { - retval = osnoise_set_cpus(tool->context, params->cpus); - if (retval) { - err_msg("Failed to apply CPUs config\n"); - goto out_err; - } - } - - if (params->stop_us) { - retval = osnoise_set_stop_us(tool->context, params->stop_us); - if (retval) { - err_msg("Failed to set stop us\n"); - goto out_err; - } - } - - if (params->stop_total_us) { - retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); - if (retval) { - err_msg("Failed to set stop total us\n"); - goto out_err; - } - } - - if (params->timerlat_period_us) { - retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us); - if (retval) { - err_msg("Failed to set timerlat period\n"); - goto out_err; - } - } - - if (params->print_stack) { - retval = osnoise_set_print_stack(tool->context, params->print_stack); - if (retval) { - err_msg("Failed to set print stack\n"); - goto out_err; - } - } + retval = timerlat_apply_config(tool, params); + if (retval) + goto out_err; return 0; @@ -752,7 +1074,7 @@ out_err: * timerlat_init_hist - initialize a timerlat hist tool with parameters */ static struct osnoise_tool -*timerlat_init_hist(struct timerlat_hist_params *params) +*timerlat_init_hist(struct common_params *params) { struct osnoise_tool *tool; int nr_cpus; @@ -763,12 +1085,11 @@ static struct osnoise_tool if (!tool) return NULL; - tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + tool->data = timerlat_alloc_histogram(nr_cpus, params->hist.entries, + params->hist.bucket_size); if (!tool->data) goto out_err; - tool->params = params; - tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", timerlat_hist_handler, tool); @@ -779,136 +1100,61 @@ out_err: return NULL; } -static int stop_tracing; -static void stop_hist(int sig) -{ - stop_tracing = 1; -} - -/* - * timerlat_hist_set_signals - handles the signal to stop the tool - */ -static void -timerlat_hist_set_signals(struct timerlat_hist_params *params) +static int timerlat_hist_bpf_main_loop(struct osnoise_tool *tool) { - signal(SIGINT, stop_hist); - if (params->duration) { - signal(SIGALRM, stop_hist); - alarm(params->duration); - } -} - -int timerlat_hist_main(int argc, char *argv[]) -{ - struct timerlat_hist_params *params; - struct osnoise_tool *record = NULL; - struct osnoise_tool *tool = NULL; - struct trace_instance *trace; - int dma_latency_fd = -1; - int return_value = 1; + struct timerlat_params *params = to_timerlat_params(tool->params); int retval; - params = timerlat_hist_parse_args(argc, argv); - if (!params) - exit(1); - - tool = timerlat_init_hist(params); - if (!tool) { - err_msg("Could not init osnoise hist\n"); - goto out_exit; - } - - retval = timerlat_hist_apply_config(tool, params); - if (retval) { - err_msg("Could not apply config\n"); - goto out_free; - } - - trace = &tool->trace; - - retval = enable_timerlat(trace); - if (retval) { - err_msg("Failed to enable timerlat tracer\n"); - goto out_free; - } - - if (params->set_sched) { - retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); - if (retval) { - err_msg("Failed to set sched parameters\n"); - goto out_free; - } - } - - if (params->dma_latency >= 0) { - dma_latency_fd = set_cpu_dma_latency(params->dma_latency); - if (dma_latency_fd < 0) { - err_msg("Could not set /dev/cpu_dma_latency.\n"); - goto out_free; - } - } - - trace_instance_start(trace); - - if (params->trace_output) { - record = osnoise_init_trace_tool("timerlat"); - if (!record) { - err_msg("Failed to enable the trace instance\n"); - goto out_free; - } - - if (params->events) { - retval = trace_events_enable(&record->trace, params->events); - if (retval) - goto out_hist; - } - - trace_instance_start(&record->trace); - } - - tool->start_time = time(NULL); - timerlat_hist_set_signals(params); - while (!stop_tracing) { - sleep(params->sleep_time); - - retval = tracefs_iterate_raw_events(trace->tep, - trace->inst, - NULL, - 0, - collect_registered_events, - trace); - if (retval < 0) { - err_msg("Error iterating on events\n"); - goto out_hist; + timerlat_bpf_wait(-1); + + if (!stop_tracing) { + /* Threshold overflow, perform actions on threshold */ + actions_perform(¶ms->common.threshold_actions); + + if (!params->common.threshold_actions.continue_flag) + /* continue flag not set, break */ + break; + + /* continue action reached, re-enable tracing */ + if (tool->record) + trace_instance_start(&tool->record->trace); + if (tool->aa) + trace_instance_start(&tool->aa->trace); + timerlat_bpf_restart_tracing(); } - - if (trace_is_off(&tool->trace, &record->trace)) - break; } + timerlat_bpf_detach(); - timerlat_print_stats(params, tool); + retval = timerlat_hist_bpf_pull_data(tool); + if (retval) + err_msg("Error pulling BPF data\n"); - return_value = 0; + return retval; +} - if (trace_is_off(&tool->trace, &record->trace)) { - printf("rtla timerlat hit stop tracing\n"); - if (params->trace_output) { - printf(" Saving trace to %s\n", params->trace_output); - save_trace_to_file(record->trace.inst, params->trace_output); - } - } +static int timerlat_hist_main(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval; -out_hist: - if (dma_latency_fd >= 0) - close(dma_latency_fd); - trace_events_destroy(&record->trace, params->events); - params->events = NULL; -out_free: - timerlat_free_histogram(tool->data); - osnoise_destroy_tool(record); - osnoise_destroy_tool(tool); - free(params); -out_exit: - exit(return_value); + if (params->mode == TRACING_MODE_TRACEFS) + retval = hist_main_loop(tool); + else + retval = timerlat_hist_bpf_main_loop(tool); + + return retval; } + +struct tool_ops timerlat_hist_ops = { + .tracer = "timerlat", + .comm_prefix = "timerlat/", + .parse_args = timerlat_hist_parse_args, + .init_tool = timerlat_init_hist, + .apply_config = timerlat_hist_apply_config, + .enable = timerlat_enable, + .main = timerlat_hist_main, + .print_stats = timerlat_print_stats, + .analyze = timerlat_analyze, + .free = timerlat_free_histogram_tool, +}; diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 334271935222..29c2c1f717ed 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -10,33 +11,18 @@ #include <unistd.h> #include <stdio.h> #include <time.h> +#include <errno.h> +#include <sched.h> +#include <pthread.h> -#include "utils.h" -#include "osnoise.h" #include "timerlat.h" - -struct timerlat_top_params { - char *cpus; - char *monitored_cpus; - char *trace_output; - unsigned long long runtime; - long long stop_us; - long long stop_total_us; - long long timerlat_period_us; - long long print_stack; - int sleep_time; - int output_divisor; - int duration; - int quiet; - int set_sched; - int dma_latency; - struct sched_attr sched_param; - struct trace_events *events; -}; +#include "timerlat_aa.h" +#include "timerlat_bpf.h" struct timerlat_top_cpu { - int irq_count; - int thread_count; + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; unsigned long long cur_irq; unsigned long long min_irq; @@ -47,6 +33,11 @@ struct timerlat_top_cpu { unsigned long long min_thread; unsigned long long sum_thread; unsigned long long max_thread; + + unsigned long long cur_user; + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; }; struct timerlat_top_data { @@ -57,13 +48,18 @@ struct timerlat_top_data { /* * timerlat_free_top - free runtime data */ -static void -timerlat_free_top(struct timerlat_top_data *data) +static void timerlat_free_top(struct timerlat_top_data *data) { free(data->cpu_data); free(data); } +static void timerlat_free_top_tool(struct osnoise_tool *tool) +{ + timerlat_free_top(tool->data); + timerlat_free(tool); +} + /* * timerlat_alloc_histogram - alloc runtime data */ @@ -87,6 +83,7 @@ static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) for (cpu = 0; cpu < nr_cpus; cpu++) { data->cpu_data[cpu].min_irq = ~0; data->cpu_data[cpu].min_thread = ~0; + data->cpu_data[cpu].min_user = ~0; } return data; @@ -96,6 +93,37 @@ cleanup: return NULL; } +static void +timerlat_top_reset_sum(struct timerlat_top_cpu *summary) +{ + memset(summary, 0, sizeof(*summary)); + summary->min_irq = ~0; + summary->min_thread = ~0; + summary->min_user = ~0; +} + +static void +timerlat_top_update_sum(struct osnoise_tool *tool, int cpu, struct timerlat_top_cpu *sum) +{ + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + sum->irq_count += cpu_data->irq_count; + update_min(&sum->min_irq, &cpu_data->min_irq); + update_sum(&sum->sum_irq, &cpu_data->sum_irq); + update_max(&sum->max_irq, &cpu_data->max_irq); + + sum->thread_count += cpu_data->thread_count; + update_min(&sum->min_thread, &cpu_data->min_thread); + update_sum(&sum->sum_thread, &cpu_data->sum_thread); + update_max(&sum->max_thread, &cpu_data->max_thread); + + sum->user_count += cpu_data->user_count; + update_min(&sum->min_user, &cpu_data->min_user); + update_sum(&sum->sum_user, &cpu_data->sum_user); + update_max(&sum->max_user, &cpu_data->max_user); +} + /* * timerlat_hist_update - record a new timerlat occurent on cpu, updating data */ @@ -104,21 +132,31 @@ timerlat_top_update(struct osnoise_tool *tool, int cpu, unsigned long long thread, unsigned long long latency) { + struct timerlat_params *params = to_timerlat_params(tool->params); struct timerlat_top_data *data = tool->data; struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + if (params->common.output_divisor) + latency = latency / params->common.output_divisor; + if (!thread) { cpu_data->irq_count++; cpu_data->cur_irq = latency; update_min(&cpu_data->min_irq, &latency); update_sum(&cpu_data->sum_irq, &latency); update_max(&cpu_data->max_irq, &latency); - } else { + } else if (thread == 1) { cpu_data->thread_count++; cpu_data->cur_thread = latency; update_min(&cpu_data->min_thread, &latency); update_sum(&cpu_data->sum_thread, &latency); update_max(&cpu_data->max_thread, &latency); + } else { + cpu_data->user_count++; + cpu_data->cur_user = latency; + update_min(&cpu_data->min_user, &latency); + update_sum(&cpu_data->sum_user, &latency); + update_max(&cpu_data->max_user, &latency); } } @@ -136,10 +174,82 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record, top = container_of(trace, struct osnoise_tool, trace); - tep_get_field_val(s, event, "context", record, &thread, 1); - tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + if (!top->params->aa_only) { + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); - timerlat_top_update(top, cpu, thread, latency); + timerlat_top_update(top, cpu, thread, latency); + } + + return 0; +} + +/* + * timerlat_top_bpf_pull_data - copy data from BPF maps into userspace + */ +static int timerlat_top_bpf_pull_data(struct osnoise_tool *tool) +{ + struct timerlat_top_data *data = tool->data; + int i, err; + long long value_irq[data->nr_cpus], + value_thread[data->nr_cpus], + value_user[data->nr_cpus]; + + /* Pull summary */ + err = timerlat_bpf_get_summary_value(SUMMARY_CURRENT, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].cur_irq = value_irq[i]; + data->cpu_data[i].cur_thread = value_thread[i]; + data->cpu_data[i].cur_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_COUNT, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].irq_count = value_irq[i]; + data->cpu_data[i].thread_count = value_thread[i]; + data->cpu_data[i].user_count = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_MIN, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].min_irq = value_irq[i]; + data->cpu_data[i].min_thread = value_thread[i]; + data->cpu_data[i].min_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_MAX, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].max_irq = value_irq[i]; + data->cpu_data[i].max_thread = value_thread[i]; + data->cpu_data[i].max_user = value_user[i]; + } + + err = timerlat_bpf_get_summary_value(SUMMARY_SUM, + value_irq, value_thread, value_user, + data->nr_cpus); + if (err) + return err; + for (i = 0; i < data->nr_cpus; i++) { + data->cpu_data[i].sum_irq = value_irq[i]; + data->cpu_data[i].sum_thread = value_thread[i]; + data->cpu_data[i].sum_user = value_user[i]; + } return 0; } @@ -147,44 +257,59 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record, /* * timerlat_top_header - print the header of the tool output */ -static void timerlat_top_header(struct osnoise_tool *top) +static void timerlat_top_header(struct timerlat_params *params, struct osnoise_tool *top) { - struct timerlat_top_params *params = top->params; struct trace_seq *s = top->trace.seq; + bool pretty = params->common.pretty_output; char duration[26]; get_duration(top->start_time, duration, sizeof(duration)); - trace_seq_printf(s, "\033[2;37;40m"); + if (pretty) + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Timer Latency "); - trace_seq_printf(s, "\033[0;0;0m"); + if (params->common.user_data) + trace_seq_printf(s, " "); + + if (pretty) + trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); - trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration, - params->output_divisor == 1 ? "ns" : "us", - params->output_divisor == 1 ? "ns" : "us"); + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration, + params->common.output_divisor == 1 ? "ns" : "us", + params->common.output_divisor == 1 ? "ns" : "us"); + + if (params->common.user_data) { + trace_seq_printf(s, " | Ret user Timer Latency (%s)", + params->common.output_divisor == 1 ? "ns" : "us"); + } + + trace_seq_printf(s, "\n"); + if (pretty) + trace_seq_printf(s, "\033[2;30;47m"); - trace_seq_printf(s, "\033[2;30;47m"); trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); - trace_seq_printf(s, "\033[0;0;0m"); + if (params->common.user_data) + trace_seq_printf(s, " | cur min avg max"); + + if (pretty) + trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); } +static const char *no_value = " -"; + /* * timerlat_top_print - prints the output of a given CPU */ static void timerlat_top_print(struct osnoise_tool *top, int cpu) { - - struct timerlat_top_params *params = top->params; + struct timerlat_params *params = to_timerlat_params(top->params); struct timerlat_top_data *data = top->data; struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; - int divisor = params->output_divisor; struct trace_seq *s = top->trace.seq; - if (divisor == 0) - return; - /* * Skip if no data is available: is this cpu offline? */ @@ -194,31 +319,109 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) /* * Unless trace is being lost, IRQ counter is always the max. */ - trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + trace_seq_printf(s, "%3d #%-9llu |", cpu, cpu_data->irq_count); if (!cpu_data->irq_count) { - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - |"); + trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); } else { - trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); - trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); - trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor); - trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->cur_irq); + trace_seq_printf(s, "%9llu ", cpu_data->min_irq); + trace_seq_printf(s, "%9llu ", cpu_data->sum_irq / cpu_data->irq_count); + trace_seq_printf(s, "%9llu |", cpu_data->max_irq); } if (!cpu_data->thread_count) { - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " -\n"); + trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_thread); + trace_seq_printf(s, "%9llu ", cpu_data->min_thread); + trace_seq_printf(s, "%9llu ", + cpu_data->sum_thread / cpu_data->thread_count); + trace_seq_printf(s, "%9llu", cpu_data->max_thread); + } + + if (!params->common.user_data) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, " |"); + + if (!cpu_data->user_count) { + trace_seq_printf(s, "%s %s %s %s\n", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_user); + trace_seq_printf(s, "%9llu ", cpu_data->min_user); + trace_seq_printf(s, "%9llu ", + cpu_data->sum_user / cpu_data->user_count); + trace_seq_printf(s, "%9llu\n", cpu_data->max_user); + } +} + +/* + * timerlat_top_print_sum - prints the summary output + */ +static void +timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summary) +{ + const char *split = "----------------------------------------"; + struct timerlat_params *params = to_timerlat_params(top->params); + unsigned long long count = summary->irq_count; + struct trace_seq *s = top->trace.seq; + int e = 0; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!summary->irq_count && !summary->thread_count) + return; + + while (count > 999999) { + e++; + count /= 10; + } + + trace_seq_printf(s, "%.*s|%.*s|%.*s", 15, split, 40, split, 39, split); + if (params->common.user_data) + trace_seq_printf(s, "-|%.*s", 39, split); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "ALL #%-6llu e%d |", count, e); + + if (!summary->irq_count) { + trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value); } else { - trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); - trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_irq); + trace_seq_printf(s, "%9llu ", summary->sum_irq / summary->irq_count); + trace_seq_printf(s, "%9llu |", summary->max_irq); + } + + if (!summary->thread_count) { + trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_thread); trace_seq_printf(s, "%9llu ", - (cpu_data->sum_thread / cpu_data->thread_count) / divisor); - trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor); + summary->sum_thread / summary->thread_count); + trace_seq_printf(s, "%9llu", summary->max_thread); + } + + if (!params->common.user_data) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, " |"); + + if (!summary->user_count) { + trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value); + } else { + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_user); + trace_seq_printf(s, "%9llu ", + summary->sum_user / summary->user_count); + trace_seq_printf(s, "%9llu\n", summary->max_user); } } @@ -235,57 +438,71 @@ static void clear_terminal(struct trace_seq *seq) * timerlat_print_stats - print data for all cpus */ static void -timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top) +timerlat_print_stats(struct osnoise_tool *top) { + struct timerlat_params *params = to_timerlat_params(top->params); struct trace_instance *trace = &top->trace; + struct timerlat_top_cpu summary; static int nr_cpus = -1; int i; + if (params->common.aa_only) + return; + if (nr_cpus == -1) nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - if (!params->quiet) + if (!params->common.quiet) clear_terminal(trace->seq); - timerlat_top_header(top); + timerlat_top_reset_sum(&summary); + + timerlat_top_header(params, top); - for (i = 0; i < nr_cpus; i++) { - if (params->cpus && !params->monitored_cpus[i]) - continue; + for_each_monitored_cpu(i, nr_cpus, ¶ms->common) { timerlat_top_print(top, i); + timerlat_top_update_sum(top, i, &summary); } + timerlat_top_print_sum(top, &summary); + trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); + osnoise_report_missed_events(top); } /* * timerlat_top_usage - prints timerlat top usage message */ -static void timerlat_top_usage(char *usage) +static void timerlat_top_usage(void) { int i; static const char *const msg[] = { "", " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", - " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\", - " [-P priority] [--dma-latency us]", + " [[-t [file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", + " [-P priority] [--dma-latency us] [--aa-only us] [-C [cgroup_name]] [-u|-k] [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", + " --aa-only us: stop if <us> latency is hit, only printing the auto analysis (reduces CPU usage)", " -p/--period us: timerlat period in us", " -i/--irq us: stop trace if the irq latency is higher than the argument in us", " -T/--thread us: stop trace if the thread latency is higher than the argument in us", " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", " -c/--cpus cpus: run the tracer only on the given cpus", - " -d/--duration time[m|h|d]: duration of the session in seconds", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup [cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", + " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", - " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", + " -t/--trace [file]: save the stopped trace to [file|timerlat_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <command>: enable a trace event filter to the previous -e event", " --trigger <command>: enable a trace event trigger to the previous -e event", " -n/--nano: display data in nanoseconds", + " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", " -q/--quiet print only a summary at the end", " --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency", " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", @@ -294,50 +511,68 @@ static void timerlat_top_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads", + " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads", + " -U/--user-load: enable timerlat for user-defined user-space workload", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", + " --on-threshold <action>: define action to be executed at latency threshold, multiple are allowed", + " --on-end: define action to be executed at measurement end, multiple are allowed", NULL, }; - if (usage) - fprintf(stderr, "%s\n", usage); - fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n", VERSION); for (i = 0; msg[i]; i++) fprintf(stderr, "%s\n", msg[i]); - exit(1); + + exit(EXIT_SUCCESS); } /* * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters */ -static struct timerlat_top_params +static struct common_params *timerlat_top_parse_args(int argc, char **argv) { - struct timerlat_top_params *params; + struct timerlat_params *params; struct trace_events *tevent; long long auto_thresh; int retval; int c; + char *trace_output = NULL; params = calloc(1, sizeof(*params)); if (!params) exit(1); + actions_init(¶ms->common.threshold_actions); + actions_init(¶ms->common.end_actions); + /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ - params->output_divisor = 1000; + params->common.output_divisor = 1000; + + /* default to BPF mode */ + params->mode = TRACING_MODE_BPF; while (1) { static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"debug", no_argument, 0, 'D'}, {"duration", required_argument, 0, 'd'}, {"event", required_argument, 0, 'e'}, {"help", no_argument, 0, 'h'}, + {"house-keeping", required_argument, 0, 'H'}, {"irq", required_argument, 0, 'i'}, {"nano", no_argument, 0, 'n'}, {"period", required_argument, 0, 'p'}, @@ -346,17 +581,25 @@ static struct timerlat_top_params {"stack", required_argument, 0, 's'}, {"thread", required_argument, 0, 'T'}, {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, + {"kernel-threads", no_argument, 0, 'k'}, + {"user-load", no_argument, 0, 'U'}, {"trigger", required_argument, 0, '0'}, {"filter", required_argument, 0, '1'}, {"dma-latency", required_argument, 0, '2'}, + {"no-aa", no_argument, 0, '3'}, + {"dump-tasks", no_argument, 0, '4'}, + {"aa-only", required_argument, 0, '5'}, + {"warm-up", required_argument, 0, '6'}, + {"trace-buffer-size", required_argument, 0, '7'}, + {"deepest-idle-state", required_argument, 0, '8'}, + {"on-threshold", required_argument, 0, '9'}, + {"on-end", required_argument, 0, '\1'}, {0, 0, 0, 0} }; - /* getopt_long stores the option index here. */ - int option_index = 0; - - c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:", - long_options, &option_index); + c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:knp:P:qs:t::T:uU0:1:2:345:6:7:", + long_options, NULL); /* detect the end of the options. */ if (c == -1) @@ -367,171 +610,209 @@ static struct timerlat_top_params auto_thresh = get_llong_from_str(optarg); /* set thread stop to auto_thresh */ - params->stop_total_us = auto_thresh; + params->common.stop_total_us = auto_thresh; + params->common.stop_us = auto_thresh; /* get stack trace */ params->print_stack = auto_thresh; /* set trace */ - params->trace_output = "timerlat_trace.txt"; + if (!trace_output) + trace_output = "timerlat_trace.txt"; break; + case '5': + /* it is here because it is similar to -a */ + auto_thresh = get_llong_from_str(optarg); + + /* set thread stop to auto_thresh */ + params->common.stop_total_us = auto_thresh; + params->common.stop_us = auto_thresh; + + /* get stack trace */ + params->print_stack = auto_thresh; + + /* set aa_only to avoid parsing the trace */ + params->common.aa_only = 1; + break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->common.monitored_cpus); if (retval) - timerlat_top_usage("\nInvalid -c cpu list\n"); - params->cpus = optarg; + fatal("Invalid -c cpu list"); + params->common.cpus = optarg; + break; + case 'C': + params->common.cgroup = 1; + params->common.cgroup_name = optarg; break; case 'D': config_debug = 1; break; case 'd': - params->duration = parse_seconds_duration(optarg); - if (!params->duration) - timerlat_top_usage("Invalid -D duration\n"); + params->common.duration = parse_seconds_duration(optarg); + if (!params->common.duration) + fatal("Invalid -d duration"); break; case 'e': tevent = trace_event_alloc(optarg); - if (!tevent) { - err_msg("Error alloc trace event"); - exit(EXIT_FAILURE); - } + if (!tevent) + fatal("Error alloc trace event"); - if (params->events) - tevent->next = params->events; - params->events = tevent; + if (params->common.events) + tevent->next = params->common.events; + params->common.events = tevent; break; case 'h': case '?': - timerlat_top_usage(NULL); + timerlat_top_usage(); + break; + case 'H': + params->common.hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->common.hk_cpu_set); + if (retval) + fatal("Error parsing house keeping CPUs"); break; case 'i': - params->stop_us = get_llong_from_str(optarg); + params->common.stop_us = get_llong_from_str(optarg); + break; + case 'k': + params->common.kernel_workload = true; break; case 'n': - params->output_divisor = 1; + params->common.output_divisor = 1; break; case 'p': params->timerlat_period_us = get_llong_from_str(optarg); if (params->timerlat_period_us > 1000000) - timerlat_top_usage("Period longer than 1 s\n"); + fatal("Period longer than 1 s"); break; case 'P': - retval = parse_prio(optarg, ¶ms->sched_param); + retval = parse_prio(optarg, ¶ms->common.sched_param); if (retval == -1) - timerlat_top_usage("Invalid -P priority"); - params->set_sched = 1; + fatal("Invalid -P priority"); + params->common.set_sched = 1; break; case 'q': - params->quiet = 1; + params->common.quiet = 1; break; case 's': params->print_stack = get_llong_from_str(optarg); break; case 'T': - params->stop_total_us = get_llong_from_str(optarg); + params->common.stop_total_us = get_llong_from_str(optarg); break; case 't': - if (optarg) - /* skip = */ - params->trace_output = &optarg[1]; - else - params->trace_output = "timerlat_trace.txt"; + trace_output = parse_optional_arg(argc, argv); + if (!trace_output) + trace_output = "timerlat_trace.txt"; + break; + case 'u': + params->common.user_workload = true; + /* fallback: -u implies -U */ + case 'U': + params->common.user_data = true; break; case '0': /* trigger */ - if (params->events) { - retval = trace_event_add_trigger(params->events, optarg); - if (retval) { - err_msg("Error adding trigger %s\n", optarg); - exit(EXIT_FAILURE); - } + if (params->common.events) { + retval = trace_event_add_trigger(params->common.events, optarg); + if (retval) + fatal("Error adding trigger %s", optarg); } else { - timerlat_top_usage("--trigger requires a previous -e\n"); + fatal("--trigger requires a previous -e"); } break; case '1': /* filter */ - if (params->events) { - retval = trace_event_add_filter(params->events, optarg); - if (retval) { - err_msg("Error adding filter %s\n", optarg); - exit(EXIT_FAILURE); - } + if (params->common.events) { + retval = trace_event_add_filter(params->common.events, optarg); + if (retval) + fatal("Error adding filter %s", optarg); } else { - timerlat_top_usage("--filter requires a previous -e\n"); + fatal("--filter requires a previous -e"); } break; case '2': /* dma-latency */ params->dma_latency = get_llong_from_str(optarg); - if (params->dma_latency < 0 || params->dma_latency > 10000) { - err_msg("--dma-latency needs to be >= 0 and < 10000"); - exit(EXIT_FAILURE); - } + if (params->dma_latency < 0 || params->dma_latency > 10000) + fatal("--dma-latency needs to be >= 0 and < 10000"); + break; + case '3': /* no-aa */ + params->no_aa = 1; + break; + case '4': + params->dump_tasks = 1; + break; + case '6': + params->common.warmup = get_llong_from_str(optarg); + break; + case '7': + params->common.buffer_size = get_llong_from_str(optarg); + break; + case '8': + params->deepest_idle_state = get_llong_from_str(optarg); + break; + case '9': + retval = actions_parse(¶ms->common.threshold_actions, optarg, + "timerlat_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); + break; + case '\1': + retval = actions_parse(¶ms->common.end_actions, optarg, + "timerlat_trace.txt"); + if (retval) + fatal("Invalid action %s", optarg); break; default: - timerlat_top_usage("Invalid option"); + fatal("Invalid option"); } } - if (geteuid()) { - err_msg("rtla needs root permission\n"); - exit(EXIT_FAILURE); - } + if (trace_output) + actions_add_trace_output(¶ms->common.threshold_actions, trace_output); + + if (geteuid()) + fatal("rtla needs root permission"); + + /* + * Auto analysis only happens if stop tracing, thus: + */ + if (!params->common.stop_us && !params->common.stop_total_us) + params->no_aa = 1; + + if (params->no_aa && params->common.aa_only) + fatal("--no-aa and --aa-only are mutually exclusive!"); - return params; + if (params->common.kernel_workload && params->common.user_workload) + fatal("--kernel-threads and --user-threads are mutually exclusive!"); + + /* + * If auto-analysis or trace output is enabled, switch from BPF mode to + * mixed mode + */ + if (params->mode == TRACING_MODE_BPF && + (params->common.threshold_actions.present[ACTION_TRACE_OUTPUT] || + params->common.end_actions.present[ACTION_TRACE_OUTPUT] || + !params->no_aa)) + params->mode = TRACING_MODE_MIXED; + + return ¶ms->common; } /* * timerlat_top_apply_config - apply the top configs to the initialized tool */ static int -timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) +timerlat_top_apply_config(struct osnoise_tool *top) { + struct timerlat_params *params = to_timerlat_params(top->params); int retval; - if (!params->sleep_time) - params->sleep_time = 1; - - if (params->cpus) { - retval = osnoise_set_cpus(top->context, params->cpus); - if (retval) { - err_msg("Failed to apply CPUs config\n"); - goto out_err; - } - } - - if (params->stop_us) { - retval = osnoise_set_stop_us(top->context, params->stop_us); - if (retval) { - err_msg("Failed to set stop us\n"); - goto out_err; - } - } - - if (params->stop_total_us) { - retval = osnoise_set_stop_total_us(top->context, params->stop_total_us); - if (retval) { - err_msg("Failed to set stop total us\n"); - goto out_err; - } - } - - - if (params->timerlat_period_us) { - retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us); - if (retval) { - err_msg("Failed to set timerlat period\n"); - goto out_err; - } - } + retval = timerlat_apply_config(top, params); + if (retval) + goto out_err; - - if (params->print_stack) { - retval = osnoise_set_print_stack(top->context, params->print_stack); - if (retval) { - err_msg("Failed to set print stack\n"); - goto out_err; - } - } + if (isatty(STDOUT_FILENO) && !params->common.quiet) + params->common.pretty_output = 1; return 0; @@ -543,7 +824,7 @@ out_err: * timerlat_init_top - initialize a timerlat top tool with parameters */ static struct osnoise_tool -*timerlat_init_top(struct timerlat_top_params *params) +*timerlat_init_top(struct common_params *params) { struct osnoise_tool *top; int nr_cpus; @@ -558,8 +839,6 @@ static struct osnoise_tool if (!top->data) goto out_err; - top->params = params; - tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", timerlat_top_handler, top); @@ -570,140 +849,87 @@ out_err: return NULL; } -static int stop_tracing; -static void stop_top(int sig) -{ - stop_tracing = 1; -} - /* - * timerlat_top_set_signals - handles the signal to stop the tool + * timerlat_top_bpf_main_loop - main loop to process events (BPF variant) */ -static void -timerlat_top_set_signals(struct timerlat_top_params *params) -{ - signal(SIGINT, stop_top); - if (params->duration) { - signal(SIGALRM, stop_top); - alarm(params->duration); - } -} - -int timerlat_top_main(int argc, char *argv[]) +static int +timerlat_top_bpf_main_loop(struct osnoise_tool *tool) { - struct timerlat_top_params *params; - struct osnoise_tool *record = NULL; - struct osnoise_tool *top = NULL; - struct trace_instance *trace; - int dma_latency_fd = -1; - int return_value = 1; - int retval; + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval, wait_retval; - params = timerlat_top_parse_args(argc, argv); - if (!params) - exit(1); - - top = timerlat_init_top(params); - if (!top) { - err_msg("Could not init osnoise top\n"); - goto out_exit; + if (params->common.aa_only) { + /* Auto-analysis only, just wait for stop tracing */ + timerlat_bpf_wait(-1); + return 0; } - retval = timerlat_top_apply_config(top, params); - if (retval) { - err_msg("Could not apply config\n"); - goto out_free; - } - - trace = &top->trace; - - retval = enable_timerlat(trace); - if (retval) { - err_msg("Failed to enable timerlat tracer\n"); - goto out_free; - } + /* Pull and display data in a loop */ + while (!stop_tracing) { + wait_retval = timerlat_bpf_wait(params->common.quiet ? -1 : + params->common.sleep_time); - if (params->set_sched) { - retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + retval = timerlat_top_bpf_pull_data(tool); if (retval) { - err_msg("Failed to set sched parameters\n"); - goto out_free; + err_msg("Error pulling BPF data\n"); + return retval; } - } - if (params->dma_latency >= 0) { - dma_latency_fd = set_cpu_dma_latency(params->dma_latency); - if (dma_latency_fd < 0) { - err_msg("Could not set /dev/cpu_dma_latency.\n"); - goto out_free; - } - } + if (!params->common.quiet) + timerlat_print_stats(tool); - trace_instance_start(trace); + if (wait_retval != 0) { + /* Stopping requested by tracer */ + actions_perform(¶ms->common.threshold_actions); - if (params->trace_output) { - record = osnoise_init_trace_tool("timerlat"); - if (!record) { - err_msg("Failed to enable the trace instance\n"); - goto out_free; - } + if (!params->common.threshold_actions.continue_flag) + /* continue flag not set, break */ + break; - if (params->events) { - retval = trace_events_enable(&record->trace, params->events); - if (retval) - goto out_top; + /* continue action reached, re-enable tracing */ + if (tool->record) + trace_instance_start(&tool->record->trace); + if (tool->aa) + trace_instance_start(&tool->aa->trace); + timerlat_bpf_restart_tracing(); } - trace_instance_start(&record->trace); - } - - top->start_time = time(NULL); - timerlat_top_set_signals(params); - - while (!stop_tracing) { - sleep(params->sleep_time); - - retval = tracefs_iterate_raw_events(trace->tep, - trace->inst, - NULL, - 0, - collect_registered_events, - trace); - if (retval < 0) { - err_msg("Error iterating on events\n"); - goto out_top; + /* is there still any user-threads ? */ + if (params->common.user_workload) { + if (params->common.user.stopped_running) { + debug_msg("timerlat user space threads stopped!\n"); + break; + } } - - if (!params->quiet) - timerlat_print_stats(params, top); - - if (trace_is_off(&top->trace, &record->trace)) - break; - } - timerlat_print_stats(params, top); + return 0; +} - return_value = 0; +static int timerlat_top_main_loop(struct osnoise_tool *tool) +{ + struct timerlat_params *params = to_timerlat_params(tool->params); + int retval; - if (trace_is_off(&top->trace, &record->trace)) { - printf("rtla timerlat hit stop tracing\n"); - if (params->trace_output) { - printf(" Saving trace to %s\n", params->trace_output); - save_trace_to_file(record->trace.inst, params->trace_output); - } + if (params->mode == TRACING_MODE_TRACEFS) { + retval = top_main_loop(tool); + } else { + retval = timerlat_top_bpf_main_loop(tool); + timerlat_bpf_detach(); } -out_top: - if (dma_latency_fd >= 0) - close(dma_latency_fd); - trace_events_destroy(&record->trace, params->events); - params->events = NULL; -out_free: - timerlat_free_top(top->data); - osnoise_destroy_tool(record); - osnoise_destroy_tool(top); - free(params); -out_exit: - exit(return_value); + return retval; } + +struct tool_ops timerlat_top_ops = { + .tracer = "timerlat", + .comm_prefix = "timerlat/", + .parse_args = timerlat_top_parse_args, + .init_tool = timerlat_init_top, + .apply_config = timerlat_top_apply_config, + .enable = timerlat_enable, + .main = timerlat_top_main_loop, + .print_stats = timerlat_print_stats, + .analyze = timerlat_analyze, + .free = timerlat_free_top_tool, +}; diff --git a/tools/tracing/rtla/src/timerlat_u.c b/tools/tracing/rtla/src/timerlat_u.c new file mode 100644 index 000000000000..ce68e39d25fd --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_u.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <tracefs.h> +#include <pthread.h> +#include <sys/wait.h> +#include <sys/prctl.h> + +#include "utils.h" +#include "timerlat_u.h" + +/* + * This is the user-space main for the tool timerlatu/ threads. + * + * It is as simple as this: + * - set affinity + * - set priority + * - open tracer fd + * - spin + * - close + */ +static int timerlat_u_main(int cpu, struct timerlat_u_params *params) +{ + struct sched_param sp = { .sched_priority = 95 }; + char buffer[1024]; + int timerlat_fd; + cpu_set_t set; + int retval; + + /* + * This all is only setting up the tool. + */ + CPU_ZERO(&set); + CPU_SET(cpu, &set); + + retval = sched_setaffinity(gettid(), sizeof(set), &set); + if (retval == -1) { + debug_msg("Error setting user thread affinity %d, is the CPU online?\n", cpu); + exit(1); + } + + if (!params->sched_param) { + retval = sched_setscheduler(0, SCHED_FIFO, &sp); + if (retval < 0) + fatal("Error setting timerlat u default priority: %s", strerror(errno)); + } else { + retval = __set_sched_attr(getpid(), params->sched_param); + if (retval) { + /* __set_sched_attr prints an error message, so */ + exit(0); + } + } + + if (params->cgroup_name) { + retval = set_pid_cgroup(gettid(), params->cgroup_name); + if (!retval) { + err_msg("Error setting timerlat u cgroup pid\n"); + pthread_exit(&retval); + } + } + + /* + * This is the tool's loop. If you want to use as base for your own tool... + * go ahead. + */ + snprintf(buffer, sizeof(buffer), "osnoise/per_cpu/cpu%d/timerlat_fd", cpu); + + timerlat_fd = tracefs_instance_file_open(NULL, buffer, O_RDONLY); + if (timerlat_fd < 0) + fatal("Error opening %s:%s", buffer, strerror(errno)); + + debug_msg("User-space timerlat pid %d on cpu %d\n", gettid(), cpu); + + /* add should continue with a signal handler */ + while (true) { + retval = read(timerlat_fd, buffer, 1024); + if (retval < 0) + break; + } + + close(timerlat_fd); + + debug_msg("Leaving timerlat pid %d on cpu %d\n", gettid(), cpu); + exit(0); +} + +/* + * timerlat_u_send_kill - send a kill signal for all processes + * + * Return the number of processes that received the kill. + */ +static int timerlat_u_send_kill(pid_t *procs, int nr_cpus) +{ + int killed = 0; + int i, retval; + + for (i = 0; i < nr_cpus; i++) { + if (!procs[i]) + continue; + retval = kill(procs[i], SIGKILL); + if (!retval) + killed++; + else + err_msg("Error killing child process %d\n", procs[i]); + } + + return killed; +} + +/** + * timerlat_u_dispatcher - dispatch one timerlatu/ process per monitored CPU + * + * This is a thread main that will fork one new process for each monitored + * CPU. It will wait for: + * + * - rtla to tell to kill the child processes + * - some child process to die, and the cleanup all the processes + * + * whichever comes first. + * + */ +void *timerlat_u_dispatcher(void *data) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + struct timerlat_u_params *params = data; + char proc_name[128]; + int procs_count = 0; + int retval = 1; + pid_t *procs; + int wstatus; + pid_t pid; + int i; + + debug_msg("Dispatching timerlat u procs\n"); + + procs = calloc(nr_cpus, sizeof(pid_t)); + if (!procs) + pthread_exit(&retval); + + for (i = 0; i < nr_cpus; i++) { + if (params->set && !CPU_ISSET(i, params->set)) + continue; + + pid = fork(); + + /* child */ + if (!pid) { + + /* + * rename the process + */ + snprintf(proc_name, sizeof(proc_name), "timerlatu/%d", i); + pthread_setname_np(pthread_self(), proc_name); + prctl(PR_SET_NAME, (unsigned long)proc_name, 0, 0, 0); + + timerlat_u_main(i, params); + /* timerlat_u_main should exit()! Anyways... */ + pthread_exit(&retval); + } + + /* parent */ + if (pid == -1) { + timerlat_u_send_kill(procs, nr_cpus); + debug_msg("Failed to create child processes"); + pthread_exit(&retval); + } + + procs_count++; + procs[i] = pid; + } + + while (params->should_run) { + /* check if processes died */ + pid = waitpid(-1, &wstatus, WNOHANG); + if (pid != 0) { + for (i = 0; i < nr_cpus; i++) { + if (procs[i] == pid) { + procs[i] = 0; + procs_count--; + } + } + + if (!procs_count) + break; + } + + sleep(1); + } + + timerlat_u_send_kill(procs, nr_cpus); + + while (procs_count) { + pid = waitpid(-1, &wstatus, 0); + if (pid == -1) { + err_msg("Failed to monitor child processes"); + pthread_exit(&retval); + } + for (i = 0; i < nr_cpus; i++) { + if (procs[i] == pid) { + procs[i] = 0; + procs_count--; + } + } + } + + params->stopped_running = 1; + + free(procs); + retval = 0; + pthread_exit(&retval); + +} diff --git a/tools/tracing/rtla/src/timerlat_u.h b/tools/tracing/rtla/src/timerlat_u.h new file mode 100644 index 000000000000..661511908957 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_u.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +struct timerlat_u_params { + /* timerlat -> timerlat_u: user-space threads can keep running */ + int should_run; + /* timerlat_u -> timerlat: all timerlat_u threads left, no reason to continue */ + int stopped_running; + + /* threads config */ + cpu_set_t *set; + char *cgroup_name; + struct sched_attr *sched_param; +}; + +void *timerlat_u_dispatcher(void *data); diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index e1ba6d9f4265..69cbc48d53d3 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -75,12 +75,16 @@ int save_trace_to_file(struct tracefs_instance *inst, const char *filename) int out_fd, in_fd; int retval = -1; + if (!inst || !filename) + return 0; + in_fd = tracefs_instance_file_open(inst, file, O_RDONLY); if (in_fd < 0) { err_msg("Failed to open trace file\n"); return -1; } + printf(" Saving trace to %s\n", filename); out_fd = creat(filename, mode); if (out_fd < 0) { err_msg("Failed to create output file %s\n", filename); @@ -118,6 +122,8 @@ collect_registered_events(struct tep_event *event, struct tep_record *record, struct trace_instance *trace = context; struct trace_seq *s = trace->seq; + trace->processed_events++; + if (!event->handler) return 0; @@ -127,6 +133,31 @@ collect_registered_events(struct tep_event *event, struct tep_record *record, } /* + * collect_missed_events - record number of missed events + * + * If rtla cannot keep up with events generated by tracer, events are going + * to fall out of the ring buffer. + * Collect how many events were missed so it can be reported to the user. + */ +static int +collect_missed_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + + if (trace->missed_events == UINT64_MAX) + return 0; + + if (record->missed_events > 0) + trace->missed_events += record->missed_events; + else + /* Events missed but no data on how many */ + trace->missed_events = UINT64_MAX; + + return 0; +} + +/* * trace_instance_destroy - destroy and free a rtla trace instance */ void trace_instance_destroy(struct trace_instance *trace) @@ -181,6 +212,17 @@ int trace_instance_init(struct trace_instance *trace, char *tool_name) */ tracefs_trace_off(trace->inst); + /* + * Collect the number of events missed due to tracefs buffer + * overflow. + */ + trace->missed_events = 0; + tracefs_follow_missed_events(trace->inst, + collect_missed_events, + trace); + + trace->processed_events = 0; + return 0; out_err: @@ -197,6 +239,14 @@ int trace_instance_start(struct trace_instance *trace) } /* + * trace_instance_stop - stop tracing a given rtla instance + */ +int trace_instance_stop(struct trace_instance *trace) +{ + return tracefs_trace_off(trace->inst); +} + +/* * trace_events_free - free a list of trace events */ static void trace_events_free(struct trace_events *events) @@ -522,21 +572,17 @@ void trace_events_destroy(struct trace_instance *instance, trace_events_free(events); } -int trace_is_off(struct trace_instance *tool, struct trace_instance *trace) +/* + * trace_set_buffer_size - set the per-cpu tracing buffer size. + */ +int trace_set_buffer_size(struct trace_instance *trace, int size) { - /* - * The tool instance is always present, it is the one used to collect - * data. - */ - if (!tracefs_trace_is_on(tool->inst)) - return 1; + int retval; - /* - * The trace instance is only enabled when -t is set. IOW, when the system - * is tracing. - */ - if (trace && !tracefs_trace_is_on(trace->inst)) - return 1; + debug_msg("Setting trace buffer size to %d Kb\n", size); + retval = tracefs_instance_set_buffer_size(trace->inst, size, -1); + if (retval) + err_msg("Error setting trace buffer size\n"); - return 0; + return retval; } diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index 2e9a89a25615..1e5aee4b828d 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -17,19 +17,19 @@ struct trace_instance { struct tracefs_instance *inst; struct tep_handle *tep; struct trace_seq *seq; + unsigned long long missed_events; + unsigned long long processed_events; }; int trace_instance_init(struct trace_instance *trace, char *tool_name); int trace_instance_start(struct trace_instance *trace); +int trace_instance_stop(struct trace_instance *trace); void trace_instance_destroy(struct trace_instance *trace); struct trace_seq *get_trace_seq(void); int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name); void disable_tracer(struct tracefs_instance *inst); -int enable_osnoise(struct trace_instance *trace); -int enable_timerlat(struct trace_instance *trace); - struct tracefs_instance *create_instance(char *instance_name); void destroy_instance(struct tracefs_instance *inst); @@ -47,4 +47,4 @@ int trace_events_enable(struct trace_instance *instance, int trace_event_add_filter(struct trace_events *event, char *filter); int trace_event_add_trigger(struct trace_events *event, char *trigger); -int trace_is_off(struct trace_instance *tool, struct trace_instance *trace); +int trace_set_buffer_size(struct trace_instance *trace, int size); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 663a047f794d..9cf5a0098e9a 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -3,6 +3,10 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE +#ifdef HAVE_LIBCPUPOWER_SUPPORT +#include <cpuidle.h> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ #include <dirent.h> #include <stdarg.h> #include <stdlib.h> @@ -53,6 +57,21 @@ void debug_msg(const char *fmt, ...) } /* + * fatal - print an error message and EOL to stderr and exit with ERROR + */ +void fatal(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + + exit(ERROR); +} + +/* * get_llong_from_str - get a long long int from a string */ long long get_llong_from_str(char *start) @@ -88,27 +107,24 @@ void get_duration(time_t start_time, char *output, int output_size) } /* - * parse_cpu_list - parse a cpu_list filling a char vector with cpus set + * parse_cpu_set - parse a cpu_list filling cpu_set_t argument * - * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char - * in the monitored_cpus. + * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set + * filling cpu_set_t argument. * - * XXX: convert to a bitmask. + * Returns 1 on success, 0 otherwise. */ -int parse_cpu_list(char *cpu_list, char **monitored_cpus) +int parse_cpu_set(char *cpu_list, cpu_set_t *set) { - char *mon_cpus; const char *p; int end_cpu; int nr_cpus; int cpu; int i; - nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + CPU_ZERO(set); - mon_cpus = calloc(nr_cpus, sizeof(char)); - if (!mon_cpus) - goto err; + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); for (p = cpu_list; *p; ) { cpu = atoi(p); @@ -128,12 +144,12 @@ int parse_cpu_list(char *cpu_list, char **monitored_cpus) end_cpu = cpu; if (cpu == end_cpu) { - debug_msg("cpu_list: adding cpu %d\n", cpu); - mon_cpus[cpu] = 1; + debug_msg("cpu_set: adding cpu %d\n", cpu); + CPU_SET(cpu, set); } else { for (i = cpu; i <= end_cpu; i++) { - debug_msg("cpu_list: adding cpu %d\n", i); - mon_cpus[i] = 1; + debug_msg("cpu_set: adding cpu %d\n", i); + CPU_SET(i, set); } } @@ -141,12 +157,9 @@ int parse_cpu_list(char *cpu_list, char **monitored_cpus) p++; } - *monitored_cpus = mon_cpus; - return 0; - err: - debug_msg("Error parsing the cpu list %s", cpu_list); + debug_msg("Error parsing the cpu set %s\n", cpu_list); return 1; } @@ -216,45 +229,37 @@ long parse_ns_duration(char *val) /* * This is a set of helper functions to use SCHED_DEADLINE. */ -#ifdef __x86_64__ -# define __NR_sched_setattr 314 -# define __NR_sched_getattr 315 -#elif __i386__ -# define __NR_sched_setattr 351 -# define __NR_sched_getattr 352 -#elif __arm__ -# define __NR_sched_setattr 380 -# define __NR_sched_getattr 381 -#elif __aarch64__ || __riscv -# define __NR_sched_setattr 274 -# define __NR_sched_getattr 275 -#elif __powerpc__ -# define __NR_sched_setattr 355 -# define __NR_sched_getattr 356 -#elif __s390x__ -# define __NR_sched_setattr 345 -# define __NR_sched_getattr 346 +#ifndef __NR_sched_setattr +# ifdef __x86_64__ +# define __NR_sched_setattr 314 +# elif __i386__ +# define __NR_sched_setattr 351 +# elif __arm__ +# define __NR_sched_setattr 380 +# elif __aarch64__ || __riscv +# define __NR_sched_setattr 274 +# elif __powerpc__ +# define __NR_sched_setattr 355 +# elif __s390x__ +# define __NR_sched_setattr 345 +# elif __loongarch__ +# define __NR_sched_setattr 274 +# endif #endif #define SCHED_DEADLINE 6 -static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, +static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags) { return syscall(__NR_sched_setattr, pid, attr, flags); } -static inline int sched_getattr(pid_t pid, struct sched_attr *attr, - unsigned int size, unsigned int flags) -{ - return syscall(__NR_sched_getattr, pid, attr, size, flags); -} - int __set_sched_attr(int pid, struct sched_attr *attr) { int flags = 0; int retval; - retval = sched_setattr(pid, attr, flags); + retval = syscall_sched_setattr(pid, attr, flags); if (retval < 0) { err_msg("Failed to set sched attributes to the pid %d: %s\n", pid, strerror(errno)); @@ -484,13 +489,13 @@ int parse_prio(char *arg, struct sched_attr *sched_param) if (prio == INVALID_VAL) return -1; - if (prio < sched_get_priority_min(SCHED_OTHER)) + if (prio < MIN_NICE) return -1; - if (prio > sched_get_priority_max(SCHED_OTHER)) + if (prio > MAX_NICE) return -1; sched_param->sched_policy = SCHED_OTHER; - sched_param->sched_priority = prio; + sched_param->sched_nice = prio; break; default: return -1; @@ -529,3 +534,469 @@ int set_cpu_dma_latency(int32_t latency) return fd; } + +#ifdef HAVE_LIBCPUPOWER_SUPPORT +static unsigned int **saved_cpu_idle_disable_state; +static size_t saved_cpu_idle_disable_state_alloc_ctr; + +/* + * save_cpu_idle_state_disable - save disable for all idle states of a cpu + * + * Saves the current disable of all idle states of a cpu, to be subsequently + * restored via restore_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int save_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int nr_cpus; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (saved_cpu_idle_disable_state == NULL) { + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); + if (!saved_cpu_idle_disable_state) + return -1; + } + + saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + saved_cpu_idle_disable_state_alloc_ctr++; + + for (state = 0; state < nr_states; state++) { + disabled = cpuidle_is_state_disabled(cpu, state); + if (disabled < 0) + return disabled; + saved_cpu_idle_disable_state[cpu][state] = disabled; + } + + return nr_states; +} + +/* + * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu + * + * Restores the current disable state of all idle states of a cpu that was + * previously saved by save_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int restore_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int result; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (!saved_cpu_idle_disable_state) + return -1; + + for (state = 0; state < nr_states; state++) { + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + disabled = saved_cpu_idle_disable_state[cpu][state]; + result = cpuidle_state_disable(cpu, state, disabled); + if (result < 0) + return result; + } + + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + saved_cpu_idle_disable_state_alloc_ctr--; + if (saved_cpu_idle_disable_state_alloc_ctr == 0) { + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; + } + + return nr_states; +} + +/* + * free_cpu_idle_disable_states - free saved idle state disable for all cpus + * + * Frees the memory used for storing cpu idle state disable for all cpus + * and states. + * + * Normally, the memory is freed automatically in + * restore_cpu_idle_disable_state; this is mostly for cleaning up after an + * error. + */ +void free_cpu_idle_disable_states(void) +{ + int cpu; + int nr_cpus; + + if (!saved_cpu_idle_disable_state) + return; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (cpu = 0; cpu < nr_cpus; cpu++) { + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + } + + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; +} + +/* + * set_deepest_cpu_idle_state - limit idle state of cpu + * + * Disables all idle states deeper than the one given in + * deepest_state (assuming states with higher number are deeper). + * + * This is used to reduce the exit from idle latency. Unlike + * set_cpu_dma_latency, it can disable idle states per cpu. + * + * Return: idle state count on success, negative on error + */ +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) +{ + unsigned int nr_states; + unsigned int state; + int result; + + nr_states = cpuidle_state_count(cpu); + + for (state = deepest_state + 1; state < nr_states; state++) { + result = cpuidle_state_disable(cpu, state, 1); + if (result < 0) + return result; + } + + return nr_states; +} +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * find_mount - find a the mount point of a given fs + * + * Returns 0 if mount is not found, otherwise return 1 and fill mp + * with the mount point. + */ +static const int find_mount(const char *fs, char *mp, int sizeof_mp) +{ + char mount_point[MAX_PATH+1]; + char type[100]; + int found = 0; + FILE *fp; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return 0; + + while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { + if (strcmp(type, fs) == 0) { + found = 1; + break; + } + } + fclose(fp); + + if (!found) + return 0; + + memset(mp, 0, sizeof_mp); + strncpy(mp, mount_point, sizeof_mp - 1); + + debug_msg("Fs %s found at %s\n", fs, mp); + return 1; +} + +/* + * get_self_cgroup - get the current thread cgroup path + * + * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: + * + * 0::/user.slice/user-0.slice/session-3.scope'\n' + * + * This function is interested in the content after the second : and before the '\n'. + * + * Returns 1 if a string was found, 0 otherwise. + */ +static int get_self_cgroup(char *self_cg, int sizeof_self_cg) +{ + char path[MAX_PATH], *start; + int fd, retval; + + snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); + + fd = open(path, O_RDONLY); + if (fd < 0) + return 0; + + retval = read(fd, path, MAX_PATH); + + close(fd); + + if (retval <= 0) + return 0; + + start = path; + + start = strstr(start, ":"); + if (!start) + return 0; + + /* skip ":" */ + start++; + + start = strstr(start, ":"); + if (!start) + return 0; + + /* skip ":" */ + start++; + + if (strlen(start) >= sizeof_self_cg) + return 0; + + snprintf(self_cg, sizeof_self_cg, "%s", start); + + /* Swap '\n' with '\0' */ + start = strstr(self_cg, "\n"); + + /* there must be '\n' */ + if (!start) + return 0; + + /* ok, it found a string after the second : and before the \n */ + *start = '\0'; + + return 1; +} + +/* + * set_comm_cgroup - Set cgroup to pid_t pid + * + * If cgroup argument is not NULL, the threads will move to the given cgroup. + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. + * + * Supports cgroup v2. + * + * Returns 1 on success, 0 otherwise. + */ +int set_pid_cgroup(pid_t pid, const char *cgroup) +{ + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; + char cgroup_procs[MAX_PATH]; + char pid_str[24]; + int retval; + int cg_fd; + + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); + if (!retval) { + err_msg("Did not find cgroupv2 mount point\n"); + return 0; + } + + if (!cgroup) { + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path)); + if (!retval) { + err_msg("Did not find self cgroup\n"); + return 0; + } + } else { + snprintf(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); + } + + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); + + debug_msg("Using cgroup path at: %s\n", cgroup_procs); + + cg_fd = open(cgroup_procs, O_RDWR); + if (cg_fd < 0) + return 0; + + snprintf(pid_str, sizeof(pid_str), "%d\n", pid); + + retval = write(cg_fd, pid_str, strlen(pid_str)); + if (retval < 0) + err_msg("Error setting cgroup attributes for pid:%s - %s\n", + pid_str, strerror(errno)); + else + debug_msg("Set cgroup attributes for pid:%s\n", pid_str); + + close(cg_fd); + + return (retval >= 0); +} + +/** + * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix + * + * If cgroup argument is not NULL, the threads will move to the given cgroup. + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. + * + * Supports cgroup v2. + * + * Returns 1 on success, 0 otherwise. + */ +int set_comm_cgroup(const char *comm_prefix, const char *cgroup) +{ + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; + char cgroup_procs[MAX_PATH]; + struct dirent *proc_entry; + DIR *procfs; + int retval; + int cg_fd; + + if (strlen(comm_prefix) >= MAX_PATH) { + err_msg("Command prefix is too long: %d < strlen(%s)\n", + MAX_PATH, comm_prefix); + return 0; + } + + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); + if (!retval) { + err_msg("Did not find cgroupv2 mount point\n"); + return 0; + } + + if (!cgroup) { + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path)); + if (!retval) { + err_msg("Did not find self cgroup\n"); + return 0; + } + } else { + snprintf(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); + } + + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); + + debug_msg("Using cgroup path at: %s\n", cgroup_procs); + + cg_fd = open(cgroup_procs, O_RDWR); + if (cg_fd < 0) + return 0; + + procfs = opendir("/proc"); + if (!procfs) { + err_msg("Could not open procfs\n"); + goto out_cg; + } + + while ((proc_entry = readdir(procfs))) { + + retval = procfs_is_workload_pid(comm_prefix, proc_entry); + if (!retval) + continue; + + retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); + if (retval < 0) { + err_msg("Error setting cgroup attributes for pid:%s - %s\n", + proc_entry->d_name, strerror(errno)); + goto out_procfs; + } + + debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); + } + + closedir(procfs); + close(cg_fd); + return 1; + +out_procfs: + closedir(procfs); +out_cg: + close(cg_fd); + return 0; +} + +/** + * auto_house_keeping - Automatically move rtla out of measurement threads + * + * Try to move rtla away from the tracer, if possible. + * + * Returns 1 on success, 0 otherwise. + */ +int auto_house_keeping(cpu_set_t *monitored_cpus) +{ + cpu_set_t rtla_cpus, house_keeping_cpus; + int retval; + + /* first get the CPUs in which rtla can actually run. */ + retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); + if (retval == -1) { + debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); + return 0; + } + + /* then check if the existing setup is already good. */ + CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); + if (!CPU_COUNT(&house_keeping_cpus)) { + debug_msg("rtla and the monitored CPUs do not share CPUs."); + debug_msg("Skipping auto house-keeping\n"); + return 1; + } + + /* remove the intersection */ + CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); + + /* get only those that rtla can run */ + CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); + + /* is there any cpu left? */ + if (!CPU_COUNT(&house_keeping_cpus)) { + debug_msg("Could not find any CPU for auto house-keeping\n"); + return 0; + } + + retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); + if (retval == -1) { + debug_msg("Could not set affinity for auto house-keeping\n"); + return 0; + } + + debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); + + return 1; +} + +/** + * parse_optional_arg - Parse optional argument value + * + * Parse optional argument value, which can be in the form of: + * -sarg, -s/--long=arg, -s/--long arg + * + * Returns arg value if found, NULL otherwise. + */ +char *parse_optional_arg(int argc, char **argv) +{ + if (optarg) { + if (optarg[0] == '=') { + /* skip the = */ + return &optarg[1]; + } else { + return optarg; + } + /* parse argument of form -s [arg] and --long [arg]*/ + } else if (optind < argc && argv[optind][0] != '-') { + /* consume optind */ + return argv[optind++]; + } else { + return NULL; + } +} diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index 5571afd3b549..091df4ba4587 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -1,12 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 + #include <stdint.h> #include <time.h> +#include <sched.h> /* * '18446744073709551615\0' */ #define BUFF_U64_STR_SIZE 24 #define MAX_PATH 1024 +#define MAX_NICE 20 +#define MIN_NICE -19 #define container_of(ptr, type, member)({ \ const typeof(((type *)0)->member) *__mptr = (ptr); \ @@ -15,11 +19,13 @@ extern int config_debug; void debug_msg(const char *fmt, ...); void err_msg(const char *fmt, ...); +void fatal(const char *fmt, ...); long parse_seconds_duration(char *val); void get_duration(time_t start_time, char *output, int output_size); int parse_cpu_list(char *cpu_list, char **monitored_cpus); +char *parse_optional_arg(int argc, char **argv); long long get_llong_from_str(char *start); static inline void @@ -42,6 +48,7 @@ update_sum(unsigned long long *a, unsigned long long *b) *a += *b; } +#ifndef SCHED_ATTR_SIZE_VER0 struct sched_attr { uint32_t size; uint32_t sched_policy; @@ -52,7 +59,35 @@ struct sched_attr { uint64_t sched_deadline; uint64_t sched_period; }; +#endif /* SCHED_ATTR_SIZE_VER0 */ int parse_prio(char *arg, struct sched_attr *sched_param); +int parse_cpu_set(char *cpu_list, cpu_set_t *set); +int __set_sched_attr(int pid, struct sched_attr *attr); int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); +int set_comm_cgroup(const char *comm_prefix, const char *cgroup); +int set_pid_cgroup(pid_t pid, const char *cgroup); int set_cpu_dma_latency(int32_t latency); +#ifdef HAVE_LIBCPUPOWER_SUPPORT +int save_cpu_idle_disable_state(unsigned int cpu); +int restore_cpu_idle_disable_state(unsigned int cpu); +void free_cpu_idle_disable_states(void); +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state); +static inline int have_libcpupower_support(void) { return 1; } +#else +static inline int save_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline int restore_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline void free_cpu_idle_disable_states(void) { } +static inline int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state) { return -1; } +static inline int have_libcpupower_support(void) { return 0; } +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ +int auto_house_keeping(cpu_set_t *monitored_cpus); + +#define ns_to_usf(x) (((double)x/1000)) +#define ns_to_per(total, part) ((part * 100) / (double)total) + +enum result { + PASSED = 0, /* same as EXIT_SUCCESS */ + ERROR = 1, /* same as EXIT_FAILURE, an error in arguments */ + FAILED = 2, /* test hit the stop tracing condition */ +}; |
