summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/histogram.rst3
-rw-r--r--kernel/trace/trace.c11
-rw-r--r--kernel/trace/trace_events_hist.c105
-rw-r--r--kernel/trace/trace_osnoise.c615
-rw-r--r--samples/ftrace/ftrace-direct-multi.c2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc63
6 files changed, 616 insertions, 183 deletions
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 66ec972dfb78..859fd1b76c63 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -1766,7 +1766,8 @@ using the same key and variable from yet another event::
Expressions support the use of addition, subtraction, multiplication and
division operators (+-\*/).
-Note that division by zero always returns -1.
+Note if division by zero cannot be detected at parse time (i.e. the
+divisor is not a constant), the result will be -1.
Numeric constants can also be used directly in an expression::
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c88bbfe75d1d..f9139dc1262c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5605,6 +5605,7 @@ static const char readme_msg[] =
#ifdef CONFIG_HIST_TRIGGERS
" hist trigger\t- If set, event hits are aggregated into a hash table\n"
"\t Format: hist:keys=<field1[,field2,...]>\n"
+ "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
"\t [:values=<field1[,field2,...]>]\n"
"\t [:sort=<field1[,field2,...]>]\n"
"\t [:size=#entries]\n"
@@ -5616,6 +5617,16 @@ static const char readme_msg[] =
"\t common_timestamp - to record current timestamp\n"
"\t common_cpu - to record the CPU the event happened on\n"
"\n"
+ "\t A hist trigger variable can be:\n"
+ "\t - a reference to a field e.g. x=current_timestamp,\n"
+ "\t - a reference to another variable e.g. y=$x,\n"
+ "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
+ "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
+ "\n"
+ "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
+ "\t multiplication(*) and division(/) operators. An operand can be either a\n"
+ "\t variable reference, field or numeric literal.\n"
+ "\n"
"\t When a matching event is hit, an entry is added to a hash\n"
"\t table using the key(s) and value(s) named, and the value of a\n"
"\t sum called 'hitcount' is incremented. Keys and values\n"
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 61586f16a853..0abc9a413b4d 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -68,7 +68,8 @@
C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \
C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \
C(EXPECT_NUMBER, "Expecting numeric literal"), \
- C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"),
+ C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \
+ C(DIVISION_BY_ZERO, "Division by zero"),
#undef C
#define C(a, b) HIST_ERR_##a
@@ -92,6 +93,7 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field,
#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
#define HIST_ACTIONS_MAX 8
#define HIST_CONST_DIGITS_MAX 21
+#define HIST_DIV_SHIFT 20 /* For optimizing division by constants */
enum field_op_id {
FIELD_OP_NONE,
@@ -160,6 +162,8 @@ struct hist_field {
/* Numeric literals are represented as u64 */
u64 constant;
+ /* Used to optimize division by constants */
+ u64 div_multiplier;
};
static u64 hist_field_none(struct hist_field *field,
@@ -311,6 +315,68 @@ static u64 hist_field_div(struct hist_field *hist_field,
return div64_u64(val1, val2);
}
+static u64 div_by_power_of_two(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+
+ return val1 >> __ffs64(operand2->constant);
+}
+
+static u64 div_by_not_power_of_two(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+
+ return div64_u64(val1, operand2->constant);
+}
+
+static u64 div_by_mult_and_shift(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+
+ /*
+ * If the divisor is a constant, do a multiplication and shift instead.
+ *
+ * Choose Z = some power of 2. If Y <= Z, then:
+ * X / Y = (X * (Z / Y)) / Z
+ *
+ * (Z / Y) is a constant (mult) which is calculated at parse time, so:
+ * X / Y = (X * mult) / Z
+ *
+ * The division by Z can be replaced by a shift since Z is a power of 2:
+ * X / Y = (X * mult) >> HIST_DIV_SHIFT
+ *
+ * As long, as X < Z the results will not be off by more than 1.
+ */
+ if (val1 < (1 << HIST_DIV_SHIFT)) {
+ u64 mult = operand2->div_multiplier;
+
+ return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT;
+ }
+
+ return div64_u64(val1, operand2->constant);
+}
+
static u64 hist_field_mult(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct trace_buffer *buffer,
@@ -573,6 +639,25 @@ struct snapshot_context {
void *key;
};
+/*
+ * Returns the specific division function to use if the divisor
+ * is constant. This avoids extra branches when the trigger is hit.
+ */
+static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor)
+{
+ u64 div = divisor->constant;
+
+ if (!(div & (div - 1)))
+ return div_by_power_of_two;
+
+ /* If the divisor is too large, do a regular division */
+ if (div > (1 << HIST_DIV_SHIFT))
+ return div_by_not_power_of_two;
+
+ divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div);
+ return div_by_mult_and_shift;
+}
+
static void track_data_free(struct track_data *track_data)
{
struct hist_elt_data *elt_data;
@@ -2575,6 +2660,24 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
expr->operands[0] = operand1;
expr->operands[1] = operand2;
+ if (field_op == FIELD_OP_DIV &&
+ operand2_flags & HIST_FIELD_FL_CONST) {
+ u64 divisor = var2 ? var2->constant : operand2->constant;
+
+ if (!divisor) {
+ hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
+ ret = -EDOM;
+ goto free;
+ }
+
+ /*
+ * Copy the divisor here so we don't have to look it up
+ * later if this is a var ref
+ */
+ operand2->constant = divisor;
+ op_fn = hist_field_get_div_fn(operand2);
+ }
+
if (combine_consts) {
if (var1)
expr->operands[0] = var1;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index d11b41784fac..3e4a1651e329 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -38,8 +38,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/osnoise.h>
-static struct trace_array *osnoise_trace;
-
/*
* Default values.
*/
@@ -51,6 +49,99 @@ static struct trace_array *osnoise_trace;
#define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */
/*
+ * trace_array of the enabled osnoise/timerlat instances.
+ */
+struct osnoise_instance {
+ struct list_head list;
+ struct trace_array *tr;
+};
+struct list_head osnoise_instances;
+
+static bool osnoise_has_registered_instances(void)
+{
+ return !!list_first_or_null_rcu(&osnoise_instances,
+ struct osnoise_instance,
+ list);
+}
+
+/*
+ * osnoise_instance_registered - check if a tr is already registered
+ */
+static int osnoise_instance_registered(struct trace_array *tr)
+{
+ struct osnoise_instance *inst;
+ int found = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ if (inst->tr == tr)
+ found = 1;
+ }
+ rcu_read_unlock();
+
+ return found;
+}
+
+/*
+ * osnoise_register_instance - register a new trace instance
+ *
+ * Register a trace_array *tr in the list of instances running
+ * osnoise/timerlat tracers.
+ */
+static int osnoise_register_instance(struct trace_array *tr)
+{
+ struct osnoise_instance *inst;
+
+ /*
+ * register/unregister serialization is provided by trace's
+ * trace_types_lock.
+ */
+ lockdep_assert_held(&trace_types_lock);
+
+ inst = kmalloc(sizeof(*inst), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD_RCU(&inst->list);
+ inst->tr = tr;
+ list_add_tail_rcu(&inst->list, &osnoise_instances);
+
+ return 0;
+}
+
+/*
+ * osnoise_unregister_instance - unregister a registered trace instance
+ *
+ * Remove the trace_array *tr from the list of instances running
+ * osnoise/timerlat tracers.
+ */
+static void osnoise_unregister_instance(struct trace_array *tr)
+{
+ struct osnoise_instance *inst;
+ int found = 0;
+
+ /*
+ * register/unregister serialization is provided by trace's
+ * trace_types_lock.
+ */
+ lockdep_assert_held(&trace_types_lock);
+
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ if (inst->tr == tr) {
+ list_del_rcu(&inst->list);
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found)
+ return;
+
+ synchronize_rcu();
+ kfree(inst);
+}
+
+/*
* NMI runtime info.
*/
struct osn_nmi {
@@ -248,10 +339,56 @@ static struct osnoise_data {
#endif
};
-/*
- * Boolean variable used to inform that the tracer is currently sampling.
- */
-static bool osnoise_busy;
+#ifdef CONFIG_TIMERLAT_TRACER
+static inline bool timerlat_enabled(void)
+{
+ return osnoise_data.timerlat_tracer;
+}
+
+static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
+{
+ struct timerlat_variables *tlat_var = this_cpu_tmr_var();
+ /*
+ * If the timerlat is enabled, but the irq handler did
+ * not run yet enabling timerlat_tracer, do not trace.
+ */
+ if (!tlat_var->tracing_thread) {
+ osn_var->softirq.arrival_time = 0;
+ osn_var->softirq.delta_start = 0;
+ return 0;
+ }
+ return 1;
+}
+
+static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
+{
+ struct timerlat_variables *tlat_var = this_cpu_tmr_var();
+ /*
+ * If the timerlat is enabled, but the irq handler did
+ * not run yet enabling timerlat_tracer, do not trace.
+ */
+ if (!tlat_var->tracing_thread) {
+ osn_var->thread.delta_start = 0;
+ osn_var->thread.arrival_time = 0;
+ return 0;
+ }
+ return 1;
+}
+#else /* CONFIG_TIMERLAT_TRACER */
+static inline bool timerlat_enabled(void)
+{
+ return false;
+}
+
+static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
+{
+ return 1;
+}
+static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
+{
+ return 1;
+}
+#endif
#ifdef CONFIG_PREEMPT_RT
/*
@@ -315,19 +452,24 @@ static void print_osnoise_headers(struct seq_file *s)
* osnoise_taint - report an osnoise error.
*/
#define osnoise_taint(msg) ({ \
- struct trace_array *tr = osnoise_trace; \
+ struct osnoise_instance *inst; \
+ struct trace_buffer *buffer; \
\
- trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg); \
+ rcu_read_lock(); \
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) { \
+ buffer = inst->tr->array_buffer.buffer; \
+ trace_array_printk_buf(buffer, _THIS_IP_, msg); \
+ } \
+ rcu_read_unlock(); \
osnoise_data.tainted = true; \
})
/*
* Record an osnoise_sample into the tracer buffer.
*/
-static void trace_osnoise_sample(struct osnoise_sample *sample)
+static void
+__trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
{
- struct trace_array *tr = osnoise_trace;
- struct trace_buffer *buffer = tr->array_buffer.buffer;
struct trace_event_call *call = &event_osnoise;
struct ring_buffer_event *event;
struct osnoise_entry *entry;
@@ -350,6 +492,22 @@ static void trace_osnoise_sample(struct osnoise_sample *sample)
trace_buffer_unlock_commit_nostack(buffer, event);
}
+/*
+ * Record an osnoise_sample on all osnoise instances.
+ */
+static void trace_osnoise_sample(struct osnoise_sample *sample)
+{
+ struct osnoise_instance *inst;
+ struct trace_buffer *buffer;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ buffer = inst->tr->array_buffer.buffer;
+ __trace_osnoise_sample(sample, buffer);
+ }
+ rcu_read_unlock();
+}
+
#ifdef CONFIG_TIMERLAT_TRACER
/*
* Print the timerlat header info.
@@ -388,14 +546,10 @@ static void print_timerlat_headers(struct seq_file *s)
}
#endif /* CONFIG_PREEMPT_RT */
-/*
- * Record an timerlat_sample into the tracer buffer.
- */
-static void trace_timerlat_sample(struct timerlat_sample *sample)
+static void
+__trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
{
- struct trace_array *tr = osnoise_trace;
struct trace_event_call *call = &event_osnoise;
- struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
struct timerlat_entry *entry;
@@ -412,6 +566,22 @@ static void trace_timerlat_sample(struct timerlat_sample *sample)
trace_buffer_unlock_commit_nostack(buffer, event);
}
+/*
+ * Record an timerlat_sample into the tracer buffer.
+ */
+static void trace_timerlat_sample(struct timerlat_sample *sample)
+{
+ struct osnoise_instance *inst;
+ struct trace_buffer *buffer;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ buffer = inst->tr->array_buffer.buffer;
+ __trace_timerlat_sample(sample, buffer);
+ }
+ rcu_read_unlock();
+}
+
#ifdef CONFIG_STACKTRACE
#define MAX_CALLS 256
@@ -451,29 +621,18 @@ static void timerlat_save_stack(int skip)
return;
}
-/*
- * timerlat_dump_stack - dump a stack trace previously saved
- *
- * Dump a saved stack trace into the trace buffer.
- */
-static void timerlat_dump_stack(void)
+
+static void
+__timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
{
struct trace_event_call *call = &event_osnoise;
- struct trace_array *tr = osnoise_trace;
- struct trace_buffer *buffer = tr->array_buffer.buffer;
struct ring_buffer_event *event;
- struct trace_stack *fstack;
struct stack_entry *entry;
- unsigned int size;
-
- preempt_disable_notrace();
- fstack = this_cpu_ptr(&trace_stack);
- size = fstack->stack_size;
event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
tracing_gen_ctx());
if (!event)
- goto out;
+ return;
entry = ring_buffer_event_data(event);
@@ -482,12 +641,39 @@ static void timerlat_dump_stack(void)
if (!call_filter_check_discard(call, entry, buffer, event))
trace_buffer_unlock_commit_nostack(buffer, event);
+}
-out:
+/*
+ * timerlat_dump_stack - dump a stack trace previously saved
+ */
+static void timerlat_dump_stack(u64 latency)
+{
+ struct osnoise_instance *inst;
+ struct trace_buffer *buffer;
+ struct trace_stack *fstack;
+ unsigned int size;
+
+ /*
+ * trace only if latency > print_stack config, if enabled.
+ */
+ if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
+ return;
+
+ preempt_disable_notrace();
+ fstack = this_cpu_ptr(&trace_stack);
+ size = fstack->stack_size;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ buffer = inst->tr->array_buffer.buffer;
+ __timerlat_dump_stack(buffer, fstack, size);
+
+ }
+ rcu_read_unlock();
preempt_enable_notrace();
}
-#else
-#define timerlat_dump_stack() do {} while (0)
+#else /* CONFIG_STACKTRACE */
+#define timerlat_dump_stack(u64 latency) do {} while (0)
#define timerlat_save_stack(a) do {} while (0)
#endif /* CONFIG_STACKTRACE */
#endif /* CONFIG_TIMERLAT_TRACER */
@@ -867,21 +1053,9 @@ static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
if (!osn_var->sampling)
return;
-#ifdef CONFIG_TIMERLAT_TRACER
- /*
- * If the timerlat is enabled, but the irq handler did
- * not run yet enabling timerlat_tracer, do not trace.
- */
- if (unlikely(osnoise_data.timerlat_tracer)) {
- struct timerlat_variables *tlat_var;
- tlat_var = this_cpu_tmr_var();
- if (!tlat_var->tracing_thread) {
- osn_var->softirq.arrival_time = 0;
- osn_var->softirq.delta_start = 0;
+ if (unlikely(timerlat_enabled()))
+ if (!timerlat_softirq_exit(osn_var))
return;
- }
- }
-#endif
duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
@@ -975,17 +1149,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
if (!osn_var->sampling)
return;
-#ifdef CONFIG_TIMERLAT_TRACER
- if (osnoise_data.timerlat_tracer) {
- struct timerlat_variables *tlat_var;
- tlat_var = this_cpu_tmr_var();
- if (!tlat_var->tracing_thread) {
- osn_var->thread.delta_start = 0;
- osn_var->thread.arrival_time = 0;
+ if (unlikely(timerlat_enabled()))
+ if (!timerlat_thread_exit(osn_var))
return;
- }
- }
-#endif
duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
@@ -1078,12 +1244,37 @@ diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *
*/
static __always_inline void osnoise_stop_tracing(void)
{
- struct trace_array *tr = osnoise_trace;
+ struct osnoise_instance *inst;
+ struct trace_array *tr;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ tr = inst->tr;
+ trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
+ "stop tracing hit on cpu %d\n", smp_processor_id());
+
+ tracer_tracing_off(tr);
+ }
+ rcu_read_unlock();
+}
- trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
- "stop tracing hit on cpu %d\n", smp_processor_id());
+/*
+ * notify_new_max_latency - Notify a new max latency via fsnotify interface.
+ */
+static void notify_new_max_latency(u64 latency)
+{
+ struct osnoise_instance *inst;
+ struct trace_array *tr;
- tracer_tracing_off(tr);
+ rcu_read_lock();
+ list_for_each_entry_rcu(inst, &osnoise_instances, list) {
+ tr = inst->tr;
+ if (tr->max_latency < latency) {
+ tr->max_latency = latency;
+ latency_fsnotify(tr);
+ }
+ }
+ rcu_read_unlock();
}
/*
@@ -1097,7 +1288,6 @@ static __always_inline void osnoise_stop_tracing(void)
static int run_osnoise(void)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
- struct trace_array *tr = osnoise_trace;
u64 start, sample, last_sample;
u64 last_int_count, int_count;
s64 noise = 0, max_noise = 0;
@@ -1232,11 +1422,7 @@ static int run_osnoise(void)
trace_osnoise_sample(&s);
- /* Keep a running maximum ever recorded osnoise "latency" */
- if (max_noise > tr->max_latency) {
- tr->max_latency = max_noise;
- latency_fsnotify(tr);
- }
+ notify_new_max_latency(max_noise);
if (osnoise_data.stop_tracing_total)
if (s.noise > osnoise_data.stop_tracing_total)
@@ -1294,7 +1480,6 @@ static int osnoise_main(void *data)
static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
- struct trace_array *tr = osnoise_trace;
struct timerlat_variables *tlat;
struct timerlat_sample s;
u64 now;
@@ -1333,9 +1518,11 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
* running, the thread needs to receive the softirq delta_start. The
* reason being is that the softirq will be the last to be unfolded,
* resseting the thread delay to zero.
+ *
+ * The PREEMPT_RT is a special case, though. As softirqs run as threads
+ * on RT, moving the thread is enough.
*/
-#ifndef CONFIG_PREEMPT_RT
- if (osn_var->softirq.delta_start) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
&osn_var->softirq.delta_start);
@@ -1345,13 +1532,6 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
&osn_var->irq.delta_start);
}
-#else /* CONFIG_PREEMPT_RT */
- /*
- * The sofirqs run as threads on RT, so there is not need
- * to keep track of it.
- */
- copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start);
-#endif /* CONFIG_PREEMPT_RT */
/*
* Compute the current time with the expected time.
@@ -1365,11 +1545,7 @@ static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
trace_timerlat_sample(&s);
- /* Keep a running maximum ever recorded os noise "latency" */
- if (diff > tr->max_latency) {
- tr->max_latency = diff;
- latency_fsnotify(tr);
- }
+ notify_new_max_latency(diff);
if (osnoise_data.stop_tracing)
if (time_to_us(diff) >= osnoise_data.stop_tracing)
@@ -1457,11 +1633,7 @@ static int timerlat_main(void *data)
trace_timerlat_sample(&s);
-#ifdef CONFIG_STACKTRACE
- if (osnoise_data.print_stack)
- if (osnoise_data.print_stack <= time_to_us(diff))
- timerlat_dump_stack();
-#endif /* CONFIG_STACKTRACE */
+ timerlat_dump_stack(time_to_us(diff));
tlat->tracing_thread = false;
if (osnoise_data.stop_tracing_total)
@@ -1474,6 +1646,11 @@ static int timerlat_main(void *data)
hrtimer_cancel(&tlat->timer);
return 0;
}
+#else /* CONFIG_TIMERLAT_TRACER */
+static int timerlat_main(void *data)
+{
+ return 0;
+}
#endif /* CONFIG_TIMERLAT_TRACER */
/*
@@ -1516,16 +1693,13 @@ static int start_kthread(unsigned int cpu)
void *main = osnoise_main;
char comm[24];
-#ifdef CONFIG_TIMERLAT_TRACER
- if (osnoise_data.timerlat_tracer) {
+ if (timerlat_enabled()) {
snprintf(comm, 24, "timerlat/%d", cpu);
main = timerlat_main;
} else {
snprintf(comm, 24, "osnoise/%d", cpu);
}
-#else
- snprintf(comm, 24, "osnoise/%d", cpu);
-#endif
+
kthread = kthread_create_on_cpu(main, NULL, cpu, comm);
if (IS_ERR(kthread)) {
@@ -1546,7 +1720,7 @@ static int start_kthread(unsigned int cpu)
* This starts the kernel thread that will look for osnoise on many
* cpus.
*/
-static int start_per_cpu_kthreads(struct trace_array *tr)
+static int start_per_cpu_kthreads(void)
{
struct cpumask *current_mask = &save_cpumask;
int retval = 0;
@@ -1554,13 +1728,9 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
cpus_read_lock();
/*
- * Run only on CPUs in which trace and osnoise are allowed to run.
- */
- cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask);
- /*
- * And the CPU is online.
+ * Run only on online CPUs in which osnoise is allowed to run.
*/
- cpumask_and(current_mask, cpu_online_mask, current_mask);
+ cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
for_each_possible_cpu(cpu)
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
@@ -1581,13 +1751,11 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
#ifdef CONFIG_HOTPLUG_CPU
static void osnoise_hotplug_workfn(struct work_struct *dummy)
{
- struct trace_array *tr = osnoise_trace;
unsigned int cpu = smp_processor_id();
-
mutex_lock(&trace_types_lock);
- if (!osnoise_busy)
+ if (!osnoise_has_registered_instances())
goto out_unlock_trace;
mutex_lock(&interface_lock);
@@ -1596,9 +1764,6 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
goto out_unlock;
- if (!cpumask_test_cpu(cpu, tr->tracing_cpumask))
- goto out_unlock;
-
start_kthread(cpu);
out_unlock:
@@ -1687,9 +1852,6 @@ out_unlock:
return count;
}
-static void osnoise_tracer_start(struct trace_array *tr);
-static void osnoise_tracer_stop(struct trace_array *tr);
-
/*
* osnoise_cpus_write - Write function for "cpus" entry
* @filp: The active open file structure
@@ -1701,19 +1863,15 @@ static void osnoise_tracer_stop(struct trace_array *tr);
* interface to the osnoise trace. By default, it lists all CPUs,
* in this way, allowing osnoise threads to run on any online CPU
* of the system. It serves to restrict the execution of osnoise to the
- * set of CPUs writing via this interface. Note that osnoise also
- * respects the "tracing_cpumask." Hence, osnoise threads will run only
- * on the set of CPUs allowed here AND on "tracing_cpumask." Why not
- * have just "tracing_cpumask?" Because the user might be interested
- * in tracing what is running on other CPUs. For instance, one might
- * run osnoise in one HT CPU while observing what is running on the
- * sibling HT CPU.
+ * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
+ * Because the user might be interested in tracing what is running on
+ * other CPUs. For instance, one might run osnoise in one HT CPU
+ * while observing what is running on the sibling HT CPU.
*/
static ssize_t
osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
loff_t *ppos)
{
- struct trace_array *tr = osnoise_trace;
cpumask_var_t osnoise_cpumask_new;
int running, err;
char buf[256];
@@ -1732,13 +1890,12 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
goto err_free;
/*
- * trace_types_lock is taken to avoid concurrency on start/stop
- * and osnoise_busy.
+ * trace_types_lock is taken to avoid concurrency on start/stop.
*/
mutex_lock(&trace_types_lock);
- running = osnoise_busy;
+ running = osnoise_has_registered_instances();
if (running)
- osnoise_tracer_stop(tr);
+ stop_per_cpu_kthreads();
mutex_lock(&interface_lock);
/*
@@ -1752,7 +1909,7 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
mutex_unlock(&interface_lock);
if (running)
- osnoise_tracer_start(tr);
+ start_per_cpu_kthreads();
mutex_unlock(&trace_types_lock);
free_cpumask_var(osnoise_cpumask_new);
@@ -1836,6 +1993,47 @@ static const struct file_operations cpus_fops = {
.llseek = generic_file_llseek,
};
+#ifdef CONFIG_TIMERLAT_TRACER
+#ifdef CONFIG_STACKTRACE
+static int init_timerlat_stack_tracefs(struct dentry *top_dir)
+{
+ struct dentry *tmp;
+
+ tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
+ &osnoise_print_stack, &trace_min_max_fops);
+ if (!tmp)
+ return -ENOMEM;
+
+ return 0;
+}
+#else /* CONFIG_STACKTRACE */
+static int init_timerlat_stack_tracefs(struct dentry *top_dir)
+{
+ return 0;
+}
+#endif /* CONFIG_STACKTRACE */
+
+/*
+ * init_timerlat_tracefs - A function to initialize the timerlat interface files
+ */
+static int init_timerlat_tracefs(struct dentry *top_dir)
+{
+ struct dentry *tmp;
+
+ tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
+ &timerlat_period, &trace_min_max_fops);
+ if (!tmp)
+ return -ENOMEM;
+
+ return init_timerlat_stack_tracefs(top_dir);
+}
+#else /* CONFIG_TIMERLAT_TRACER */
+static int init_timerlat_tracefs(struct dentry *top_dir)
+{
+ return 0;
+}
+#endif /* CONFIG_TIMERLAT_TRACER */
+
/*
* init_tracefs - A function to initialize the tracefs interface files
*
@@ -1880,19 +2078,10 @@ static int init_tracefs(void)
tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
if (!tmp)
goto err;
-#ifdef CONFIG_TIMERLAT_TRACER
-#ifdef CONFIG_STACKTRACE
- tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
- &osnoise_print_stack, &trace_min_max_fops);
- if (!tmp)
- goto err;
-#endif
- tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
- &timerlat_period, &trace_min_max_fops);
- if (!tmp)
+ ret = init_timerlat_tracefs(top_dir);
+ if (ret)
goto err;
-#endif
return 0;
@@ -1933,74 +2122,110 @@ out_unhook_irq:
return -EINVAL;
}
-static int __osnoise_tracer_start(struct trace_array *tr)
+/*
+ * osnoise_workload_start - start the workload and hook to events
+ */
+static int osnoise_workload_start(void)
{
int retval;
+ /*
+ * Instances need to be registered after calling workload
+ * start. Hence, if there is already an instance, the
+ * workload was already registered. Otherwise, this
+ * code is on the way to register the first instance,
+ * and the workload will start.
+ */
+ if (osnoise_has_registered_instances())
+ return 0;
+
osn_var_reset_all();
retval = osnoise_hook_events();
if (retval)
return retval;
+
/*
- * Make sure NMIs see reseted values.
+ * Make sure that ftrace_nmi_enter/exit() see reset values
+ * before enabling trace_osnoise_callback_enabled.
*/
barrier();
trace_osnoise_callback_enabled = true;
- retval = start_per_cpu_kthreads(tr);
+ retval = start_per_cpu_kthreads();
if (retval) {
unhook_irq_events();
return retval;
}
- osnoise_busy = true;
-
return 0;
}
+/*
+ * osnoise_workload_stop - stop the workload and unhook the events
+ */
+static void osnoise_workload_stop(void)
+{
+ /*
+ * Instances need to be unregistered before calling
+ * stop. Hence, if there is a registered instance, more
+ * than one instance is running, and the workload will not
+ * yet stop. Otherwise, this code is on the way to disable
+ * the last instance, and the workload can stop.
+ */
+ if (osnoise_has_registered_instances())
+ return;
+
+ trace_osnoise_callback_enabled = false;
+ /*
+ * Make sure that ftrace_nmi_enter/exit() see
+ * trace_osnoise_callback_enabled as false before continuing.
+ */
+ barrier();
+
+ stop_per_cpu_kthreads();
+
+ unhook_irq_events();
+ unhook_softirq_events();
+ unhook_thread_events();
+}
+
static void osnoise_tracer_start(struct trace_array *tr)
{
int retval;
- if (osnoise_busy)
+ /*
+ * If the instance is already registered, there is no need to
+ * register it again.
+ */
+ if (osnoise_instance_registered(tr))
return;
- retval = __osnoise_tracer_start(tr);
+ retval = osnoise_workload_start();
if (retval)
pr_err(BANNER "Error starting osnoise tracer\n");
+ osnoise_register_instance(tr);
}
static void osnoise_tracer_stop(struct trace_array *tr)
{
- if (!osnoise_busy)
- return;
-
- trace_osnoise_callback_enabled = false;
- barrier();
-
- stop_per_cpu_kthreads();
-
- unhook_irq_events();
- unhook_softirq_events();
- unhook_thread_events();
-
- osnoise_busy = false;
+ osnoise_unregister_instance(tr);
+ osnoise_workload_stop();
}
static int osnoise_tracer_init(struct trace_array *tr)
{
-
- /* Only allow one instance to enable this */
- if (osnoise_busy)
+ /*
+ * Only allow osnoise tracer if timerlat tracer is not running
+ * already.
+ */
+ if (timerlat_enabled())
return -EBUSY;
- osnoise_trace = tr;
tr->max_latency = 0;
osnoise_tracer_start(tr);
-
return 0;
}
@@ -2024,45 +2249,55 @@ static void timerlat_tracer_start(struct trace_array *tr)
{
int retval;
- if (osnoise_busy)
+ /*
+ * If the instance is already registered, there is no need to
+ * register it again.
+ */
+ if (osnoise_instance_registered(tr))
return;
- osnoise_data.timerlat_tracer = 1;
-
- retval = __osnoise_tracer_start(tr);
+ retval = osnoise_workload_start();
if (retval)
- goto out_err;
+ pr_err(BANNER "Error starting timerlat tracer\n");
+
+ osnoise_register_instance(tr);
return;
-out_err:
- pr_err(BANNER "Error starting timerlat tracer\n");
}
static void timerlat_tracer_stop(struct trace_array *tr)
{
int cpu;
- if (!osnoise_busy)
- return;
-
- for_each_online_cpu(cpu)
- per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
+ osnoise_unregister_instance(tr);
- osnoise_tracer_stop(tr);
+ /*
+ * Instruct the threads to stop only if this is the last instance.
+ */
+ if (!osnoise_has_registered_instances()) {
+ for_each_online_cpu(cpu)
+ per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
+ }
- osnoise_data.timerlat_tracer = 0;
+ osnoise_workload_stop();
}
static int timerlat_tracer_init(struct trace_array *tr)
{
- /* Only allow one instance to enable this */
- if (osnoise_busy)
+ /*
+ * Only allow timerlat tracer if osnoise tracer is not running already.
+ */
+ if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
return -EBUSY;
- osnoise_trace = tr;
+ /*
+ * If this is the first instance, set timerlat_tracer to block
+ * osnoise tracer start.
+ */
+ if (!osnoise_has_registered_instances())
+ osnoise_data.timerlat_tracer = 1;
tr->max_latency = 0;
-
timerlat_tracer_start(tr);
return 0;
@@ -2071,6 +2306,13 @@ static int timerlat_tracer_init(struct trace_array *tr)
static void timerlat_tracer_reset(struct trace_array *tr)
{
timerlat_tracer_stop(tr);
+
+ /*
+ * If this is the last instance, reset timerlat_tracer allowing
+ * osnoise to be started.
+ */
+ if (!osnoise_has_registered_instances())
+ osnoise_data.timerlat_tracer = 0;
}
static struct tracer timerlat_tracer __read_mostly = {
@@ -2082,6 +2324,16 @@ static struct tracer timerlat_tracer __read_mostly = {
.print_header = print_timerlat_headers,
.allow_instances = true,
};
+
+__init static int init_timerlat_tracer(void)
+{
+ return register_tracer(&timerlat_tracer);
+}
+#else /* CONFIG_TIMERLAT_TRACER */
+__init static int init_timerlat_tracer(void)
+{
+ return 0;
+}
#endif /* CONFIG_TIMERLAT_TRACER */
__init static int init_osnoise_tracer(void)
@@ -2098,15 +2350,16 @@ __init static int init_osnoise_tracer(void)
return ret;
}
-#ifdef CONFIG_TIMERLAT_TRACER
- ret = register_tracer(&timerlat_tracer);
+ ret = init_timerlat_tracer();
if (ret) {
- pr_err(BANNER "Error registering timerlat\n");
+ pr_err(BANNER "Error registering timerlat!\n");
return ret;
}
-#endif
+
osnoise_init_hotplug_support();
+ INIT_LIST_HEAD_RCU(&osnoise_instances);
+
init_tracefs();
return 0;
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index 2a5b1fb7ac14..b6d7806b400e 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -5,6 +5,8 @@
#include <linux/ftrace.h>
#include <linux/sched/stat.h>
+extern void my_direct_func(unsigned long ip);
+
void my_direct_func(unsigned long ip)
{
trace_printk("ip %lx\n", ip);
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
new file mode 100644
index 000000000000..05ffba299dbf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
@@ -0,0 +1,63 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram expression parsing
+# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README
+
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+test_hist_expr() { # test_name expression expected_val
+ trigger="events/sched/sched_process_fork/trigger"
+
+ reset_trigger_file $trigger
+
+ echo "Test hist trigger expressions - $1"
+
+ echo "hist:keys=common_pid:x=$2" > $trigger
+
+ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+
+ actual=`grep -o 'x=[[:digit:]]*' $trigger | awk -F= '{ print $2 }'`
+
+ if [ $actual != $3 ]; then
+ fail "Failed hist trigger expression evaluation: Expression: $2 Expected: $3, Actual: $actual"
+ fi
+
+ reset_trigger_file $trigger
+}
+
+check_error() { # test_name command-with-error-pos-by-^
+ trigger="events/sched/sched_process_fork/trigger"
+
+ echo "Test hist trigger expressions - $1"
+ ftrace_errlog_check 'hist:sched:sched_process_fork' "$2" $trigger
+}
+
+test_hist_expr "Variable assignment" "123" "123"
+
+test_hist_expr "Subtraction not associative" "16-8-4-2" "2"
+
+test_hist_expr "Division not associative" "64/8/4/2" "1"
+
+test_hist_expr "Same precedence operators (+,-) evaluated left to right" "16-8+4+2" "14"
+
+test_hist_expr "Same precedence operators (*,/) evaluated left to right" "4*3/2*2" "12"
+
+test_hist_expr "Multiplication evaluated before addition/subtraction" "4+3*2-2" "8"
+
+test_hist_expr "Division evaluated before addition/subtraction" "4+6/2-2" "5"
+
+# err pos for "too many subexpressions" is dependent on where
+# the last subexpression was detected. This can vary depending
+# on how the expression tree was generated.
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=32+^10*3/20-4'
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=^1+2+3+4+5'
+
+check_error "Unary minus not supported in subexpression" 'hist:keys=common_pid:x=-(^1)+2'
+
+check_error "Division by zero" 'hist:keys=common_pid:x=3/^0'
+
+exit 0