summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig26
-rw-r--r--kernel/trace/blktrace.c25
-rw-r--r--kernel/trace/bpf_trace.c411
-rw-r--r--kernel/trace/fgraph.c18
-rw-r--r--kernel/trace/fprobe.c9
-rw-r--r--kernel/trace/ftrace.c64
-rw-r--r--kernel/trace/pid_list.c8
-rw-r--r--kernel/trace/power-traces.c1
-rw-r--r--kernel/trace/ring_buffer.c403
-rw-r--r--kernel/trace/rv/Kconfig43
-rw-r--r--kernel/trace/rv/Makefile9
-rw-r--r--kernel/trace/rv/monitors/nrp/Kconfig (renamed from kernel/trace/rv/monitors/tss/Kconfig)12
-rw-r--r--kernel/trace/rv/monitors/nrp/nrp.c138
-rw-r--r--kernel/trace/rv/monitors/nrp/nrp.h75
-rw-r--r--kernel/trace/rv/monitors/nrp/nrp_trace.h15
-rw-r--r--kernel/trace/rv/monitors/opid/Kconfig19
-rw-r--r--kernel/trace/rv/monitors/opid/opid.c168
-rw-r--r--kernel/trace/rv/monitors/opid/opid.h104
-rw-r--r--kernel/trace/rv/monitors/opid/opid_trace.h (renamed from kernel/trace/rv/monitors/sncid/sncid_trace.h)8
-rw-r--r--kernel/trace/rv/monitors/pagefault/Kconfig20
-rw-r--r--kernel/trace/rv/monitors/pagefault/pagefault.c88
-rw-r--r--kernel/trace/rv/monitors/pagefault/pagefault.h64
-rw-r--r--kernel/trace/rv/monitors/pagefault/pagefault_trace.h14
-rw-r--r--kernel/trace/rv/monitors/rtapp/Kconfig11
-rw-r--r--kernel/trace/rv/monitors/rtapp/rtapp.c33
-rw-r--r--kernel/trace/rv/monitors/rtapp/rtapp.h3
-rw-r--r--kernel/trace/rv/monitors/sched/Kconfig1
-rw-r--r--kernel/trace/rv/monitors/sched/sched.c3
-rw-r--r--kernel/trace/rv/monitors/sco/sco.c7
-rw-r--r--kernel/trace/rv/monitors/scpd/Kconfig2
-rw-r--r--kernel/trace/rv/monitors/scpd/scpd.c7
-rw-r--r--kernel/trace/rv/monitors/sleep/Kconfig22
-rw-r--r--kernel/trace/rv/monitors/sleep/sleep.c237
-rw-r--r--kernel/trace/rv/monitors/sleep/sleep.h257
-rw-r--r--kernel/trace/rv/monitors/sleep/sleep_trace.h14
-rw-r--r--kernel/trace/rv/monitors/sncid/sncid.c96
-rw-r--r--kernel/trace/rv/monitors/sncid/sncid.h49
-rw-r--r--kernel/trace/rv/monitors/snep/Kconfig2
-rw-r--r--kernel/trace/rv/monitors/snep/snep.c7
-rw-r--r--kernel/trace/rv/monitors/snep/snep.h14
-rw-r--r--kernel/trace/rv/monitors/snroc/snroc.c3
-rw-r--r--kernel/trace/rv/monitors/sssw/Kconfig (renamed from kernel/trace/rv/monitors/sncid/Kconfig)10
-rw-r--r--kernel/trace/rv/monitors/sssw/sssw.c116
-rw-r--r--kernel/trace/rv/monitors/sssw/sssw.h105
-rw-r--r--kernel/trace/rv/monitors/sssw/sssw_trace.h15
-rw-r--r--kernel/trace/rv/monitors/sts/Kconfig19
-rw-r--r--kernel/trace/rv/monitors/sts/sts.c156
-rw-r--r--kernel/trace/rv/monitors/sts/sts.h117
-rw-r--r--kernel/trace/rv/monitors/sts/sts_trace.h (renamed from kernel/trace/rv/monitors/tss/tss_trace.h)8
-rw-r--r--kernel/trace/rv/monitors/tss/tss.c91
-rw-r--r--kernel/trace/rv/monitors/tss/tss.h47
-rw-r--r--kernel/trace/rv/monitors/wip/Kconfig2
-rw-r--r--kernel/trace/rv/monitors/wip/wip.c3
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr.c3
-rw-r--r--kernel/trace/rv/reactor_panic.c8
-rw-r--r--kernel/trace/rv/reactor_printk.c8
-rw-r--r--kernel/trace/rv/rv.c220
-rw-r--r--kernel/trace/rv/rv.h39
-rw-r--r--kernel/trace/rv/rv_reactors.c138
-rw-r--r--kernel/trace/rv/rv_trace.h166
-rw-r--r--kernel/trace/trace.c309
-rw-r--r--kernel/trace/trace.h30
-rw-r--r--kernel/trace/trace_branch.c4
-rw-r--r--kernel/trace/trace_entries.h12
-rw-r--r--kernel/trace/trace_eprobe.c53
-rw-r--r--kernel/trace/trace_events.c44
-rw-r--r--kernel/trace/trace_events_filter.c194
-rw-r--r--kernel/trace/trace_events_hist.c179
-rw-r--r--kernel/trace/trace_events_trigger.c64
-rw-r--r--kernel/trace/trace_fprobe.c614
-rw-r--r--kernel/trace/trace_functions.c24
-rw-r--r--kernel/trace/trace_functions_graph.c46
-rw-r--r--kernel/trace/trace_irqsoff.c47
-rw-r--r--kernel/trace/trace_kdb.c17
-rw-r--r--kernel/trace/trace_kprobe.c57
-rw-r--r--kernel/trace/trace_mmiotrace.c12
-rw-r--r--kernel/trace/trace_osnoise.c11
-rw-r--r--kernel/trace/trace_output.c60
-rw-r--r--kernel/trace/trace_probe.c152
-rw-r--r--kernel/trace/trace_probe.h26
-rw-r--r--kernel/trace/trace_sched_wakeup.c18
-rw-r--r--kernel/trace/trace_stack.c24
-rw-r--r--kernel/trace/trace_uprobe.c55
83 files changed, 4221 insertions, 1612 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a3f35c7d83b6..f135d04e1860 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -53,6 +53,12 @@ config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
bool
+config HAVE_EXTRA_IPI_TRACEPOINTS
+ bool
+ help
+ For architectures that use ipi_raise, ipi_entry and ipi_exit
+ tracepoints.
+
config HAVE_DYNAMIC_FTRACE_WITH_ARGS
bool
help
@@ -74,11 +80,6 @@ config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
If the architecture generates __patchable_function_entries sections
but does not want them included in the ftrace locations.
-config HAVE_FTRACE_MCOUNT_RECORD
- bool
- help
- See Documentation/trace/ftrace-design.rst
-
config HAVE_SYSCALL_TRACEPOINTS
bool
help
@@ -275,7 +276,7 @@ config FUNCTION_TRACE_ARGS
funcgraph-args (for the function graph tracer)
config DYNAMIC_FTRACE
- bool "enable/disable function tracing dynamically"
+ bool
depends on FUNCTION_TRACER
depends on HAVE_DYNAMIC_FTRACE
default y
@@ -803,27 +804,22 @@ config BPF_KPROBE_OVERRIDE
Allows BPF to override the execution of a probed function and
set a different return value. This is used for error injection.
-config FTRACE_MCOUNT_RECORD
- def_bool y
- depends on DYNAMIC_FTRACE
- depends on HAVE_FTRACE_MCOUNT_RECORD
-
config FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
bool
- depends on FTRACE_MCOUNT_RECORD
+ depends on DYNAMIC_FTRACE
config FTRACE_MCOUNT_USE_CC
def_bool y
depends on $(cc-option,-mrecord-mcount)
depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
- depends on FTRACE_MCOUNT_RECORD
+ depends on DYNAMIC_FTRACE
config FTRACE_MCOUNT_USE_OBJTOOL
def_bool y
depends on HAVE_OBJTOOL_MCOUNT
depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
depends on !FTRACE_MCOUNT_USE_CC
- depends on FTRACE_MCOUNT_RECORD
+ depends on DYNAMIC_FTRACE
select OBJTOOL
config FTRACE_MCOUNT_USE_RECORDMCOUNT
@@ -831,7 +827,7 @@ config FTRACE_MCOUNT_USE_RECORDMCOUNT
depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
depends on !FTRACE_MCOUNT_USE_CC
depends on !FTRACE_MCOUNT_USE_OBJTOOL
- depends on FTRACE_MCOUNT_RECORD
+ depends on DYNAMIC_FTRACE
config TRACING_MAP
bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 3f6a7bdc6edf..47168d2afbf1 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1875,6 +1875,29 @@ void blk_fill_rwbs(char *rwbs, blk_opf_t opf)
case REQ_OP_READ:
rwbs[i++] = 'R';
break;
+ case REQ_OP_ZONE_APPEND:
+ rwbs[i++] = 'Z';
+ rwbs[i++] = 'A';
+ break;
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_RESET_ALL:
+ rwbs[i++] = 'Z';
+ rwbs[i++] = 'R';
+ if ((opf & REQ_OP_MASK) == REQ_OP_ZONE_RESET_ALL)
+ rwbs[i++] = 'A';
+ break;
+ case REQ_OP_ZONE_FINISH:
+ rwbs[i++] = 'Z';
+ rwbs[i++] = 'F';
+ break;
+ case REQ_OP_ZONE_OPEN:
+ rwbs[i++] = 'Z';
+ rwbs[i++] = 'O';
+ break;
+ case REQ_OP_ZONE_CLOSE:
+ rwbs[i++] = 'Z';
+ rwbs[i++] = 'C';
+ break;
default:
rwbs[i++] = 'N';
}
@@ -1890,6 +1913,8 @@ void blk_fill_rwbs(char *rwbs, blk_opf_t opf)
if (opf & REQ_ATOMIC)
rwbs[i++] = 'U';
+ WARN_ON_ONCE(i >= RWBS_LEN);
+
rwbs[i] = '\0';
}
EXPORT_SYMBOL_GPL(blk_fill_rwbs);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 187dc37d61d4..3ae52978cae6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -572,7 +572,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
return value;
}
-static const struct bpf_func_proto bpf_perf_event_read_proto = {
+const struct bpf_func_proto bpf_perf_event_read_proto = {
.func = bpf_perf_event_read,
.gpl_only = true,
.ret_type = RET_INTEGER,
@@ -781,8 +781,7 @@ BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
return (unsigned long) task_pt_regs(task);
}
-BTF_ID_LIST(bpf_task_pt_regs_ids)
-BTF_ID(struct, pt_regs)
+BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs)
const struct bpf_func_proto bpf_task_pt_regs_proto = {
.func = bpf_task_pt_regs,
@@ -882,7 +881,7 @@ BPF_CALL_1(bpf_send_signal, u32, sig)
return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
}
-static const struct bpf_func_proto bpf_send_signal_proto = {
+const struct bpf_func_proto bpf_send_signal_proto = {
.func = bpf_send_signal,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -894,7 +893,7 @@ BPF_CALL_1(bpf_send_signal_thread, u32, sig)
return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
}
-static const struct bpf_func_proto bpf_send_signal_thread_proto = {
+const struct bpf_func_proto bpf_send_signal_thread_proto = {
.func = bpf_send_signal_thread,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1185,7 +1184,7 @@ BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
return entry_cnt * br_entry_size;
}
-static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
+const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
.func = bpf_get_branch_snapshot,
.gpl_only = true,
.ret_type = RET_INTEGER,
@@ -1270,7 +1269,7 @@ __bpf_kfunc_start_defs();
* Return: a bpf_key pointer with a valid key pointer if the key is found, a
* NULL pointer otherwise.
*/
-__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
{
key_ref_t key_ref;
struct bpf_key *bkey;
@@ -1430,56 +1429,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
const struct bpf_func_proto *func_proto;
switch (func_id) {
- case BPF_FUNC_map_lookup_elem:
- return &bpf_map_lookup_elem_proto;
- case BPF_FUNC_map_update_elem:
- return &bpf_map_update_elem_proto;
- case BPF_FUNC_map_delete_elem:
- return &bpf_map_delete_elem_proto;
- case BPF_FUNC_map_push_elem:
- return &bpf_map_push_elem_proto;
- case BPF_FUNC_map_pop_elem:
- return &bpf_map_pop_elem_proto;
- case BPF_FUNC_map_peek_elem:
- return &bpf_map_peek_elem_proto;
- case BPF_FUNC_map_lookup_percpu_elem:
- return &bpf_map_lookup_percpu_elem_proto;
- case BPF_FUNC_ktime_get_ns:
- return &bpf_ktime_get_ns_proto;
- case BPF_FUNC_ktime_get_boot_ns:
- return &bpf_ktime_get_boot_ns_proto;
- case BPF_FUNC_tail_call:
- return &bpf_tail_call_proto;
- case BPF_FUNC_get_current_task:
- return &bpf_get_current_task_proto;
- case BPF_FUNC_get_current_task_btf:
- return &bpf_get_current_task_btf_proto;
- case BPF_FUNC_task_pt_regs:
- return &bpf_task_pt_regs_proto;
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_current_comm:
- return &bpf_get_current_comm_proto;
- case BPF_FUNC_trace_printk:
- return bpf_get_trace_printk_proto();
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
- case BPF_FUNC_get_numa_node_id:
- return &bpf_get_numa_node_id_proto;
- case BPF_FUNC_perf_event_read:
- return &bpf_perf_event_read_proto;
- case BPF_FUNC_get_prandom_u32:
- return &bpf_get_prandom_u32_proto;
- case BPF_FUNC_probe_read_user:
- return &bpf_probe_read_user_proto;
- case BPF_FUNC_probe_read_kernel:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_kernel_proto;
- case BPF_FUNC_probe_read_user_str:
- return &bpf_probe_read_user_str_proto;
- case BPF_FUNC_probe_read_kernel_str:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_kernel_str_proto;
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
case BPF_FUNC_probe_read:
return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
@@ -1488,65 +1439,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_compat_str_proto;
#endif
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_cgrp_storage_get:
- return &bpf_cgrp_storage_get_proto;
- case BPF_FUNC_cgrp_storage_delete:
- return &bpf_cgrp_storage_delete_proto;
- case BPF_FUNC_current_task_under_cgroup:
- return &bpf_current_task_under_cgroup_proto;
-#endif
- case BPF_FUNC_send_signal:
- return &bpf_send_signal_proto;
- case BPF_FUNC_send_signal_thread:
- return &bpf_send_signal_thread_proto;
- case BPF_FUNC_perf_event_read_value:
- return &bpf_perf_event_read_value_proto;
- case BPF_FUNC_ringbuf_output:
- return &bpf_ringbuf_output_proto;
- case BPF_FUNC_ringbuf_reserve:
- return &bpf_ringbuf_reserve_proto;
- case BPF_FUNC_ringbuf_submit:
- return &bpf_ringbuf_submit_proto;
- case BPF_FUNC_ringbuf_discard:
- return &bpf_ringbuf_discard_proto;
- case BPF_FUNC_ringbuf_query:
- return &bpf_ringbuf_query_proto;
- case BPF_FUNC_jiffies64:
- return &bpf_jiffies64_proto;
- case BPF_FUNC_get_task_stack:
- return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
- : &bpf_get_task_stack_proto;
- case BPF_FUNC_copy_from_user:
- return &bpf_copy_from_user_proto;
- case BPF_FUNC_copy_from_user_task:
- return &bpf_copy_from_user_task_proto;
- case BPF_FUNC_snprintf_btf:
- return &bpf_snprintf_btf_proto;
- case BPF_FUNC_per_cpu_ptr:
- return &bpf_per_cpu_ptr_proto;
- case BPF_FUNC_this_cpu_ptr:
- return &bpf_this_cpu_ptr_proto;
- case BPF_FUNC_task_storage_get:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_get_recur_proto;
- return &bpf_task_storage_get_proto;
- case BPF_FUNC_task_storage_delete:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_delete_recur_proto;
- return &bpf_task_storage_delete_proto;
- case BPF_FUNC_for_each_map_elem:
- return &bpf_for_each_map_elem_proto;
- case BPF_FUNC_snprintf:
- return &bpf_snprintf_proto;
case BPF_FUNC_get_func_ip:
return &bpf_get_func_ip_proto_tracing;
- case BPF_FUNC_get_branch_snapshot:
- return &bpf_get_branch_snapshot_proto;
- case BPF_FUNC_find_vma:
- return &bpf_find_vma_proto;
- case BPF_FUNC_trace_vprintk:
- return bpf_get_trace_vprintk_proto();
default:
break;
}
@@ -1858,7 +1752,7 @@ static struct pt_regs *get_bpf_raw_tp_regs(void)
struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
- if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+ if (nest_level > ARRAY_SIZE(tp_regs->regs)) {
this_cpu_dec(bpf_raw_tp_nest_level);
return ERR_PTR(-EBUSY);
}
@@ -2571,7 +2465,6 @@ struct bpf_kprobe_multi_link {
u32 cnt;
u32 mods_cnt;
struct module **mods;
- u32 flags;
};
struct bpf_kprobe_multi_run_ctx {
@@ -2691,7 +2584,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
info->kprobe_multi.count = kmulti_link->cnt;
- info->kprobe_multi.flags = kmulti_link->flags;
+ info->kprobe_multi.flags = kmulti_link->link.flags;
info->kprobe_multi.missed = kmulti_link->fp.nmissed;
if (!uaddrs)
@@ -2725,10 +2618,37 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
return err;
}
+#ifdef CONFIG_PROC_FS
+static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_kprobe_multi_link *kmulti_link;
+
+ kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
+
+ seq_printf(seq,
+ "kprobe_cnt:\t%u\n"
+ "missed:\t%lu\n",
+ kmulti_link->cnt,
+ kmulti_link->fp.nmissed);
+
+ seq_printf(seq, "%s\t %s\n", "cookie", "func");
+ for (int i = 0; i < kmulti_link->cnt; i++) {
+ seq_printf(seq,
+ "%llu\t %pS\n",
+ kmulti_link->cookies[i],
+ (void *)kmulti_link->addrs[i]);
+ }
+}
+#endif
+
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_kprobe_multi_show_fdinfo,
+#endif
};
static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
@@ -2987,6 +2907,9 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
+ if (attr->link_create.flags)
+ return -EINVAL;
+
if (!is_kprobe_multi(prog))
return -EINVAL;
@@ -3062,7 +2985,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
}
bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI,
- &bpf_kprobe_multi_link_lops, prog);
+ &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type);
err = bpf_link_prime(&link->link, &link_primer);
if (err)
@@ -3078,7 +3001,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
link->addrs = addrs;
link->cookies = cookies;
link->cnt = cnt;
- link->flags = flags;
+ link->link.flags = flags;
if (cookies) {
/*
@@ -3147,7 +3070,6 @@ struct bpf_uprobe_multi_link {
struct path path;
struct bpf_link link;
u32 cnt;
- u32 flags;
struct bpf_uprobe *uprobes;
struct task_struct *task;
};
@@ -3211,7 +3133,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
info->uprobe_multi.count = umulti_link->cnt;
- info->uprobe_multi.flags = umulti_link->flags;
+ info->uprobe_multi.flags = umulti_link->link.flags;
info->uprobe_multi.pid = umulti_link->task ?
task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
@@ -3256,10 +3178,54 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
return err;
}
+#ifdef CONFIG_PROC_FS
+static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_uprobe_multi_link *umulti_link;
+ char *p, *buf;
+ pid_t pid;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+
+ buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!buf)
+ return;
+
+ p = d_path(&umulti_link->path, buf, PATH_MAX);
+ if (IS_ERR(p)) {
+ kfree(buf);
+ return;
+ }
+
+ pid = umulti_link->task ?
+ task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
+ seq_printf(seq,
+ "uprobe_cnt:\t%u\n"
+ "pid:\t%u\n"
+ "path:\t%s\n",
+ umulti_link->cnt, pid, p);
+
+ seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset");
+ for (int i = 0; i < umulti_link->cnt; i++) {
+ seq_printf(seq,
+ "%llu\t %#llx\t %#lx\n",
+ umulti_link->uprobes[i].cookie,
+ umulti_link->uprobes[i].offset,
+ umulti_link->uprobes[i].ref_ctr_offset);
+ }
+
+ kfree(buf);
+}
+#endif
+
static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
.release = bpf_uprobe_multi_link_release,
.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_uprobe_multi_show_fdinfo,
+#endif
};
static int uprobe_prog_run(struct bpf_uprobe *uprobe,
@@ -3376,6 +3342,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
+ if (attr->link_create.flags)
+ return -EINVAL;
+
if (!is_uprobe_multi(prog))
return -EINVAL;
@@ -3417,7 +3386,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
}
if (pid) {
+ rcu_read_lock();
task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
+ rcu_read_unlock();
if (!task) {
err = -ESRCH;
goto error_path_put;
@@ -3466,10 +3437,10 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
link->uprobes = uprobes;
link->path = path;
link->task = task;
- link->flags = flags;
+ link->link.flags = flags;
bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI,
- &bpf_uprobe_multi_link_lops, prog);
+ &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type);
for (i = 0; i < cnt; i++) {
uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry),
@@ -3565,6 +3536,146 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
late_initcall(bpf_kprobe_multi_kfuncs_init);
+typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
+
+/*
+ * The __always_inline is to make sure the compiler doesn't
+ * generate indirect calls into callbacks, which is expensive,
+ * on some kernel configurations. This allows compiler to put
+ * direct calls into all the specific callback implementations
+ * (copy_user_data_sleepable, copy_user_data_nofault, and so on)
+ */
+static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,
+ const void *unsafe_src,
+ copy_fn_t str_copy_fn,
+ struct task_struct *tsk)
+{
+ struct bpf_dynptr_kern *dst;
+ u32 chunk_sz, off;
+ void *dst_slice;
+ int cnt, err;
+ char buf[256];
+
+ dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
+ if (likely(dst_slice))
+ return str_copy_fn(dst_slice, unsafe_src, size, tsk);
+
+ dst = (struct bpf_dynptr_kern *)dptr;
+ if (bpf_dynptr_check_off_len(dst, doff, size))
+ return -E2BIG;
+
+ for (off = 0; off < size; off += chunk_sz - 1) {
+ chunk_sz = min_t(u32, sizeof(buf), size - off);
+ /* Expect str_copy_fn to return count of copied bytes, including
+ * zero terminator. Next iteration increment off by chunk_sz - 1 to
+ * overwrite NUL.
+ */
+ cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
+ if (cnt < 0)
+ return cnt;
+ err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
+ if (err)
+ return err;
+ if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
+ return off + cnt;
+ }
+ return off;
+}
+
+static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
+ u32 size, const void *unsafe_src,
+ copy_fn_t copy_fn, struct task_struct *tsk)
+{
+ struct bpf_dynptr_kern *dst;
+ void *dst_slice;
+ char buf[256];
+ u32 off, chunk_sz;
+ int err;
+
+ dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
+ if (likely(dst_slice))
+ return copy_fn(dst_slice, unsafe_src, size, tsk);
+
+ dst = (struct bpf_dynptr_kern *)dptr;
+ if (bpf_dynptr_check_off_len(dst, doff, size))
+ return -E2BIG;
+
+ for (off = 0; off < size; off += chunk_sz) {
+ chunk_sz = min_t(u32, sizeof(buf), size - off);
+ err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
+ if (err)
+ return err;
+ err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
+}
+
+static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ int ret;
+
+ if (!tsk) { /* Read from the current task */
+ ret = copy_from_user(dst, (const void __user *)unsafe_src, size);
+ if (ret)
+ return -EFAULT;
+ return 0;
+ }
+
+ ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
+ if (ret != size)
+ return -EFAULT;
+ return 0;
+}
+
+static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return copy_from_kernel_nofault(dst, unsafe_src, size);
+}
+
+static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
+}
+
+static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ int ret;
+
+ if (unlikely(size == 0))
+ return 0;
+
+ if (tsk) {
+ ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
+ } else {
+ ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
+ /* strncpy_from_user does not guarantee NUL termination */
+ if (ret >= 0)
+ ((char *)dst)[ret] = '\0';
+ }
+
+ if (ret < 0)
+ return ret;
+ return ret + 1;
+}
+
+static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return strncpy_from_kernel_nofault(dst, unsafe_src, size);
+}
+
__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
@@ -3576,4 +3687,62 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
return bpf_send_signal_common(sig, type, task, value);
}
+__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
+ copy_kernel_data_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
+ copy_kernel_str_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_sleepable, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_sleepable, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_sleepable, tsk);
+}
+
+__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_sleepable, tsk);
+}
+
__bpf_kfunc_end_defs();
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 8d925cbdce3a..f4d200f0c610 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1325,6 +1325,10 @@ int register_ftrace_graph(struct fgraph_ops *gops)
int ret = 0;
int i = -1;
+ if (WARN_ONCE(gops->ops.flags & FTRACE_OPS_FL_GRAPH,
+ "function graph ops registered again"))
+ return -EBUSY;
+
guard(mutex)(&ftrace_lock);
if (!fgraph_stack_cachep) {
@@ -1382,6 +1386,8 @@ int register_ftrace_graph(struct fgraph_ops *gops)
/* Always save the function, and reset at unregistering */
gops->saved_func = gops->entryfunc;
+ gops->ops.flags |= FTRACE_OPS_FL_GRAPH;
+
ret = ftrace_startup_subops(&graph_ops, &gops->ops, command);
if (!ret)
fgraph_array[i] = gops;
@@ -1399,17 +1405,21 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
{
int command = 0;
+ if (WARN_ONCE(!(gops->ops.flags & FTRACE_OPS_FL_GRAPH),
+ "function graph ops unregistered without registering"))
+ return;
+
guard(mutex)(&ftrace_lock);
if (unlikely(!ftrace_graph_active))
- return;
+ goto out;
if (unlikely(gops->idx < 0 || gops->idx >= FGRAPH_ARRAY_SIZE ||
fgraph_array[gops->idx] != gops))
- return;
+ goto out;
if (fgraph_lru_release_index(gops->idx) < 0)
- return;
+ goto out;
fgraph_array[gops->idx] = &fgraph_stub;
@@ -1432,4 +1442,6 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
}
gops->saved_func = NULL;
+ out:
+ gops->ops.flags &= ~FTRACE_OPS_FL_GRAPH;
}
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index ba7ff14f5339..c8034dfc1070 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -352,7 +352,7 @@ static void fprobe_return(struct ftrace_graph_ret *trace,
size_words = SIZE_IN_LONG(size);
ret_ip = ftrace_regs_get_instruction_pointer(fregs);
- preempt_disable();
+ preempt_disable_notrace();
curr = 0;
while (size_words > curr) {
@@ -368,7 +368,7 @@ static void fprobe_return(struct ftrace_graph_ret *trace,
}
curr += size;
}
- preempt_enable();
+ preempt_enable_notrace();
}
NOKPROBE_SYMBOL(fprobe_return);
@@ -648,6 +648,11 @@ static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
#define FPROBE_IPS_MAX INT_MAX
+int fprobe_count_ips_from_filter(const char *filter, const char *notfilter)
+{
+ return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX);
+}
+
/**
* register_fprobe() - Register fprobe to ftrace by pattern.
* @fp: A fprobe data structure to be registered.
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6981830c3128..00b76d450a89 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -188,7 +188,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
op->saved_func(ip, parent_ip, op, fregs);
}
-static void ftrace_sync_ipi(void *data)
+void ftrace_sync_ipi(void *data)
{
/* Probably not needed, but do it anyway */
smp_rmb();
@@ -1042,10 +1042,6 @@ static struct ftrace_ops *removed_ops;
*/
static bool update_all_ops;
-#ifndef CONFIG_FTRACE_MCOUNT_RECORD
-# error Dynamic ftrace depends on MCOUNT_RECORD
-#endif
-
struct ftrace_func_probe {
struct ftrace_probe_ops *probe_ops;
struct ftrace_ops ops;
@@ -4373,6 +4369,42 @@ static inline int print_rec(struct seq_file *m, unsigned long ip)
}
#endif
+static void print_subops(struct seq_file *m, struct ftrace_ops *ops, struct dyn_ftrace *rec)
+{
+ struct ftrace_ops *subops;
+ bool first = true;
+
+ list_for_each_entry(subops, &ops->subop_list, list) {
+ if (!((subops->flags & FTRACE_OPS_FL_ENABLED) &&
+ hash_contains_ip(rec->ip, subops->func_hash)))
+ continue;
+ if (first) {
+ seq_printf(m, "\tsubops:");
+ first = false;
+ }
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ if (subops->flags & FTRACE_OPS_FL_GRAPH) {
+ struct fgraph_ops *gops;
+
+ gops = container_of(subops, struct fgraph_ops, ops);
+ seq_printf(m, " {ent:%pS ret:%pS}",
+ (void *)gops->entryfunc,
+ (void *)gops->retfunc);
+ continue;
+ }
+#endif
+ if (subops->trampoline) {
+ seq_printf(m, " {%pS (%pS)}",
+ (void *)subops->trampoline,
+ (void *)subops->func);
+ add_trampoline_func(m, subops, rec);
+ } else {
+ seq_printf(m, " {%pS}",
+ (void *)subops->func);
+ }
+ }
+}
+
static int t_show(struct seq_file *m, void *v)
{
struct ftrace_iterator *iter = m->private;
@@ -4425,6 +4457,7 @@ static int t_show(struct seq_file *m, void *v)
(void *)ops->trampoline,
(void *)ops->func);
add_trampoline_func(m, ops, rec);
+ print_subops(m, ops, rec);
ops = ftrace_find_tramp_ops_next(rec, ops);
} while (ops);
} else
@@ -4437,6 +4470,7 @@ static int t_show(struct seq_file *m, void *v)
if (ops) {
seq_printf(m, "\tops: %pS (%pS)",
ops, ops->func);
+ print_subops(m, ops, rec);
} else {
seq_puts(m, "\tops: ERROR!");
}
@@ -5170,8 +5204,12 @@ struct ftrace_func_map {
void *data;
};
+/*
+ * Note, ftrace_func_mapper is freed by free_ftrace_hash(&mapper->hash).
+ * The hash field must be the first field.
+ */
struct ftrace_func_mapper {
- struct ftrace_hash hash;
+ struct ftrace_hash hash; /* Must be first! */
};
/**
@@ -5306,6 +5344,7 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper,
}
}
}
+ /* This also frees the mapper itself */
free_ftrace_hash(&mapper->hash);
}
@@ -7395,9 +7434,10 @@ void ftrace_release_mod(struct module *mod)
mutex_lock(&ftrace_lock);
- if (ftrace_disabled)
- goto out_unlock;
-
+ /*
+ * To avoid the UAF problem after the module is unloaded, the
+ * 'mod_map' resource needs to be released unconditionally.
+ */
list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) {
if (mod_map->mod == mod) {
list_del_rcu(&mod_map->list);
@@ -7406,6 +7446,9 @@ void ftrace_release_mod(struct module *mod)
}
}
+ if (ftrace_disabled)
+ goto out_unlock;
+
/*
* Each module has its own ftrace_pages, remove
* them from the list.
@@ -7584,6 +7627,9 @@ allocate_ftrace_mod_map(struct module *mod,
{
struct ftrace_mod_map *mod_map;
+ if (ftrace_disabled)
+ return NULL;
+
mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL);
if (!mod_map)
return NULL;
diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
index c62b9b3cfb3d..090bb5ea4a19 100644
--- a/kernel/trace/pid_list.c
+++ b/kernel/trace/pid_list.c
@@ -81,13 +81,9 @@ static inline bool upper_empty(union upper_chunk *chunk)
{
/*
* If chunk->data has no lower chunks, it will be the same
- * as a zeroed bitmask. Use find_first_bit() to test it
- * and if it doesn't find any bits set, then the array
- * is empty.
+ * as a zeroed bitmask.
*/
- int bit = find_first_bit((unsigned long *)chunk->data,
- sizeof(chunk->data) * 8);
- return bit >= sizeof(chunk->data) * 8;
+ return bitmap_empty((unsigned long *)chunk->data, BITS_PER_TYPE(chunk->data));
}
static inline int pid_split(unsigned int pid, unsigned int *upper1,
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 21bb161c2316..f2fe33573e54 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -17,5 +17,4 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume);
EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_frequency);
-EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3f9bf562beea..5176e0270f07 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1358,6 +1358,13 @@ static inline void rb_inc_page(struct buffer_page **bpage)
*bpage = list_entry(p, struct buffer_page, list);
}
+static inline void rb_dec_page(struct buffer_page **bpage)
+{
+ struct list_head *p = rb_list_head((*bpage)->list.prev);
+
+ *bpage = list_entry(p, struct buffer_page, list);
+}
+
static struct buffer_page *
rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
{
@@ -1866,10 +1873,11 @@ static int rb_validate_buffer(struct buffer_data_page *dpage, int cpu)
static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
{
struct ring_buffer_cpu_meta *meta = cpu_buffer->ring_meta;
- struct buffer_page *head_page;
+ struct buffer_page *head_page, *orig_head;
unsigned long entry_bytes = 0;
unsigned long entries = 0;
int ret;
+ u64 ts;
int i;
if (!meta || !meta->head_buffer)
@@ -1885,8 +1893,98 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
entry_bytes += local_read(&cpu_buffer->reader_page->page->commit);
local_set(&cpu_buffer->reader_page->entries, ret);
- head_page = cpu_buffer->head_page;
+ orig_head = head_page = cpu_buffer->head_page;
+ ts = head_page->page->time_stamp;
+
+ /*
+ * Try to rewind the head so that we can read the pages which already
+ * read in the previous boot.
+ */
+ if (head_page == cpu_buffer->tail_page)
+ goto skip_rewind;
+
+ rb_dec_page(&head_page);
+ for (i = 0; i < meta->nr_subbufs + 1; i++, rb_dec_page(&head_page)) {
+
+ /* Rewind until tail (writer) page. */
+ if (head_page == cpu_buffer->tail_page)
+ break;
+
+ /* Ensure the page has older data than head. */
+ if (ts < head_page->page->time_stamp)
+ break;
+
+ ts = head_page->page->time_stamp;
+ /* Ensure the page has correct timestamp and some data. */
+ if (!ts || rb_page_commit(head_page) == 0)
+ break;
+
+ /* Stop rewind if the page is invalid. */
+ ret = rb_validate_buffer(head_page->page, cpu_buffer->cpu);
+ if (ret < 0)
+ break;
+
+ /* Recover the number of entries and update stats. */
+ local_set(&head_page->entries, ret);
+ if (ret)
+ local_inc(&cpu_buffer->pages_touched);
+ entries += ret;
+ entry_bytes += rb_page_commit(head_page);
+ }
+ if (i)
+ pr_info("Ring buffer [%d] rewound %d pages\n", cpu_buffer->cpu, i);
+
+ /* The last rewound page must be skipped. */
+ if (head_page != orig_head)
+ rb_inc_page(&head_page);
+
+ /*
+ * If the ring buffer was rewound, then inject the reader page
+ * into the location just before the original head page.
+ */
+ if (head_page != orig_head) {
+ struct buffer_page *bpage = orig_head;
+
+ rb_dec_page(&bpage);
+ /*
+ * Insert the reader_page before the original head page.
+ * Since the list encode RB_PAGE flags, general list
+ * operations should be avoided.
+ */
+ cpu_buffer->reader_page->list.next = &orig_head->list;
+ cpu_buffer->reader_page->list.prev = orig_head->list.prev;
+ orig_head->list.prev = &cpu_buffer->reader_page->list;
+ bpage->list.next = &cpu_buffer->reader_page->list;
+
+ /* Make the head_page the reader page */
+ cpu_buffer->reader_page = head_page;
+ bpage = head_page;
+ rb_inc_page(&head_page);
+ head_page->list.prev = bpage->list.prev;
+ rb_dec_page(&bpage);
+ bpage->list.next = &head_page->list;
+ rb_set_list_to_head(&bpage->list);
+ cpu_buffer->pages = &head_page->list;
+
+ cpu_buffer->head_page = head_page;
+ meta->head_buffer = (unsigned long)head_page->page;
+
+ /* Reset all the indexes */
+ bpage = cpu_buffer->reader_page;
+ meta->buffers[0] = rb_meta_subbuf_idx(meta, bpage->page);
+ bpage->id = 0;
+
+ for (i = 1, bpage = head_page; i < meta->nr_subbufs;
+ i++, rb_inc_page(&bpage)) {
+ meta->buffers[i] = rb_meta_subbuf_idx(meta, bpage->page);
+ bpage->id = i;
+ }
+
+ /* We'll restart verifying from orig_head */
+ head_page = orig_head;
+ }
+ skip_rewind:
/* If the commit_buffer is the reader page, update the commit page */
if (meta->commit_buffer == (unsigned long)cpu_buffer->reader_page->page) {
cpu_buffer->commit_page = cpu_buffer->reader_page;
@@ -2226,7 +2324,7 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
static struct ring_buffer_per_cpu *
rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
{
- struct ring_buffer_per_cpu *cpu_buffer;
+ struct ring_buffer_per_cpu *cpu_buffer __free(kfree) = NULL;
struct ring_buffer_cpu_meta *meta;
struct buffer_page *bpage;
struct page *page;
@@ -2252,7 +2350,7 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
GFP_KERNEL, cpu_to_node(cpu));
if (!bpage)
- goto fail_free_buffer;
+ return NULL;
rb_check_bpage(cpu_buffer, bpage);
@@ -2318,13 +2416,11 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
rb_head_page_activate(cpu_buffer);
}
- return cpu_buffer;
+ return_ptr(cpu_buffer);
fail_free_reader:
free_buffer_page(cpu_buffer->reader_page);
- fail_free_buffer:
- kfree(cpu_buffer);
return NULL;
}
@@ -2359,7 +2455,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
unsigned long scratch_size,
struct lock_class_key *key)
{
- struct trace_buffer *buffer;
+ struct trace_buffer *buffer __free(kfree) = NULL;
long nr_pages;
int subbuf_size;
int bsize;
@@ -2373,7 +2469,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
return NULL;
if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
- goto fail_free_buffer;
+ return NULL;
buffer->subbuf_order = order;
subbuf_size = (PAGE_SIZE << order);
@@ -2472,7 +2568,7 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
mutex_init(&buffer->mutex);
- return buffer;
+ return_ptr(buffer);
fail_free_buffers:
for_each_buffer_cpu(buffer, cpu) {
@@ -2484,8 +2580,6 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
fail_free_cpumask:
free_cpumask_var(buffer->cpumask);
- fail_free_buffer:
- kfree(buffer);
return NULL;
}
@@ -2849,6 +2943,12 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
if (nr_pages < 2)
nr_pages = 2;
+ /*
+ * Keep CPUs from coming online while resizing to synchronize
+ * with new per CPU buffers being created.
+ */
+ guard(cpus_read_lock)();
+
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
atomic_inc(&buffer->resizing);
@@ -2893,7 +2993,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
cond_resched();
}
- cpus_read_lock();
/*
* Fire off all the required work handlers
* We can't schedule on offline CPUs, but it's not necessary
@@ -2933,7 +3032,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
cpu_buffer->nr_pages_to_update = 0;
}
- cpus_read_unlock();
} else {
cpu_buffer = buffer->buffers[cpu_id];
@@ -2961,8 +3059,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
goto out_err;
}
- cpus_read_lock();
-
/* Can't run something on an offline CPU. */
if (!cpu_online(cpu_id))
rb_update_pages(cpu_buffer);
@@ -2981,7 +3077,6 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
}
cpu_buffer->nr_pages_to_update = 0;
- cpus_read_unlock();
}
out:
@@ -4121,7 +4216,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
static const char *show_irq_str(int bits)
{
- const char *type[] = {
+ static const char * type[] = {
".", // 0
"s", // 1
"h", // 2
@@ -4684,10 +4779,7 @@ void ring_buffer_discard_commit(struct trace_buffer *buffer,
RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
rb_decrement_entry(cpu_buffer, event);
- if (rb_try_to_discard(cpu_buffer, event))
- goto out;
-
- out:
+ rb_try_to_discard(cpu_buffer, event);
rb_end_commit(cpu_buffer);
trace_recursive_unlock(cpu_buffer);
@@ -4885,6 +4977,24 @@ bool ring_buffer_record_is_set_on(struct trace_buffer *buffer)
}
/**
+ * ring_buffer_record_is_on_cpu - return true if the ring buffer can write
+ * @buffer: The ring buffer to see if write is enabled
+ * @cpu: The CPU to test if the ring buffer can write too
+ *
+ * Returns true if the ring buffer is in a state that it accepts writes
+ * for a particular CPU.
+ */
+bool ring_buffer_record_is_on_cpu(struct trace_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+
+ cpu_buffer = buffer->buffers[cpu];
+
+ return ring_buffer_record_is_set_on(buffer) &&
+ !atomic_read(&cpu_buffer->record_disabled);
+}
+
+/**
* ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
* @buffer: The ring buffer to stop writes to.
* @cpu: The CPU buffer to stop
@@ -5330,7 +5440,6 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
*/
local_set(&cpu_buffer->reader_page->write, 0);
local_set(&cpu_buffer->reader_page->entries, 0);
- local_set(&cpu_buffer->reader_page->page->commit, 0);
cpu_buffer->reader_page->real_end = 0;
spin:
@@ -5834,24 +5943,20 @@ ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts,
EXPORT_SYMBOL_GPL(ring_buffer_consume);
/**
- * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
+ * ring_buffer_read_start - start a non consuming read of the buffer
* @buffer: The ring buffer to read from
* @cpu: The cpu buffer to iterate over
* @flags: gfp flags to use for memory allocation
*
- * This performs the initial preparations necessary to iterate
- * through the buffer. Memory is allocated, buffer resizing
- * is disabled, and the iterator pointer is returned to the caller.
- *
- * After a sequence of ring_buffer_read_prepare calls, the user is
- * expected to make at least one call to ring_buffer_read_prepare_sync.
- * Afterwards, ring_buffer_read_start is invoked to get things going
- * for real.
+ * This creates an iterator to allow non-consuming iteration through
+ * the buffer. If the buffer is disabled for writing, it will produce
+ * the same information each time, but if the buffer is still writing
+ * then the first hit of a write will cause the iteration to stop.
*
- * This overall must be paired with ring_buffer_read_finish.
+ * Must be paired with ring_buffer_read_finish.
*/
struct ring_buffer_iter *
-ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
+ring_buffer_read_start(struct trace_buffer *buffer, int cpu, gfp_t flags)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_iter *iter;
@@ -5877,51 +5982,12 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
atomic_inc(&cpu_buffer->resize_disabled);
- return iter;
-}
-EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
-
-/**
- * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
- *
- * All previously invoked ring_buffer_read_prepare calls to prepare
- * iterators will be synchronized. Afterwards, read_buffer_read_start
- * calls on those iterators are allowed.
- */
-void
-ring_buffer_read_prepare_sync(void)
-{
- synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
-
-/**
- * ring_buffer_read_start - start a non consuming read of the buffer
- * @iter: The iterator returned by ring_buffer_read_prepare
- *
- * This finalizes the startup of an iteration through the buffer.
- * The iterator comes from a call to ring_buffer_read_prepare and
- * an intervening ring_buffer_read_prepare_sync must have been
- * performed.
- *
- * Must be paired with ring_buffer_read_finish.
- */
-void
-ring_buffer_read_start(struct ring_buffer_iter *iter)
-{
- struct ring_buffer_per_cpu *cpu_buffer;
- unsigned long flags;
-
- if (!iter)
- return;
-
- cpu_buffer = iter->cpu_buffer;
-
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ guard(raw_spinlock_irqsave)(&cpu_buffer->reader_lock);
arch_spin_lock(&cpu_buffer->lock);
rb_iter_reset(iter);
arch_spin_unlock(&cpu_buffer->lock);
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return iter;
}
EXPORT_SYMBOL_GPL(ring_buffer_read_start);
@@ -6002,6 +6068,39 @@ static void rb_clear_buffer_page(struct buffer_page *page)
page->read = 0;
}
+/*
+ * When the buffer is memory mapped to user space, each sub buffer
+ * has a unique id that is used by the meta data to tell the user
+ * where the current reader page is.
+ *
+ * For a normal allocated ring buffer, the id is saved in the buffer page
+ * id field, and updated via this function.
+ *
+ * But for a fixed memory mapped buffer, the id is already assigned for
+ * fixed memory ording in the memory layout and can not be used. Instead
+ * the index of where the page lies in the memory layout is used.
+ *
+ * For the normal pages, set the buffer page id with the passed in @id
+ * value and return that.
+ *
+ * For fixed memory mapped pages, get the page index in the memory layout
+ * and return that as the id.
+ */
+static int rb_page_id(struct ring_buffer_per_cpu *cpu_buffer,
+ struct buffer_page *bpage, int id)
+{
+ /*
+ * For boot buffers, the id is the index,
+ * otherwise, set the buffer page with this id
+ */
+ if (cpu_buffer->ring_meta)
+ id = rb_meta_subbuf_idx(cpu_buffer->ring_meta, bpage->page);
+ else
+ bpage->id = id;
+
+ return id;
+}
+
static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
{
struct trace_buffer_meta *meta = cpu_buffer->meta_page;
@@ -6010,7 +6109,9 @@ static void rb_update_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
return;
meta->reader.read = cpu_buffer->reader_page->read;
- meta->reader.id = cpu_buffer->reader_page->id;
+ meta->reader.id = rb_page_id(cpu_buffer, cpu_buffer->reader_page,
+ cpu_buffer->reader_page->id);
+
meta->reader.lost_events = cpu_buffer->lost_events;
meta->entries = local_read(&cpu_buffer->entries);
@@ -6080,21 +6181,16 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
/* Must have disabled the cpu buffer then done a synchronize_rcu */
static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ guard(raw_spinlock_irqsave)(&cpu_buffer->reader_lock);
if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
- goto out;
+ return;
arch_spin_lock(&cpu_buffer->lock);
rb_reset_cpu(cpu_buffer);
arch_spin_unlock(&cpu_buffer->lock);
-
- out:
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
/**
@@ -6282,37 +6378,33 @@ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
!cpumask_test_cpu(cpu, buffer_b->cpumask))
- goto out;
+ return -EINVAL;
cpu_buffer_a = buffer_a->buffers[cpu];
cpu_buffer_b = buffer_b->buffers[cpu];
/* It's up to the callers to not try to swap mapped buffers */
- if (WARN_ON_ONCE(cpu_buffer_a->mapped || cpu_buffer_b->mapped)) {
- ret = -EBUSY;
- goto out;
- }
+ if (WARN_ON_ONCE(cpu_buffer_a->mapped || cpu_buffer_b->mapped))
+ return -EBUSY;
/* At least make sure the two buffers are somewhat the same */
if (cpu_buffer_a->nr_pages != cpu_buffer_b->nr_pages)
- goto out;
+ return -EINVAL;
if (buffer_a->subbuf_order != buffer_b->subbuf_order)
- goto out;
-
- ret = -EAGAIN;
+ return -EINVAL;
if (atomic_read(&buffer_a->record_disabled))
- goto out;
+ return -EAGAIN;
if (atomic_read(&buffer_b->record_disabled))
- goto out;
+ return -EAGAIN;
if (atomic_read(&cpu_buffer_a->record_disabled))
- goto out;
+ return -EAGAIN;
if (atomic_read(&cpu_buffer_b->record_disabled))
- goto out;
+ return -EAGAIN;
/*
* We can't do a synchronize_rcu here because this
@@ -6349,7 +6441,6 @@ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
out_dec:
atomic_dec(&cpu_buffer_a->record_disabled);
atomic_dec(&cpu_buffer_b->record_disabled);
-out:
return ret;
}
EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
@@ -6508,38 +6599,37 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
struct buffer_data_page *bpage;
struct buffer_page *reader;
unsigned long missed_events;
- unsigned long flags;
unsigned int commit;
unsigned int read;
u64 save_timestamp;
- int ret = -1;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
- goto out;
+ return -1;
/*
* If len is not big enough to hold the page header, then
* we can not copy anything.
*/
if (len <= BUF_PAGE_HDR_SIZE)
- goto out;
+ return -1;
len -= BUF_PAGE_HDR_SIZE;
if (!data_page || !data_page->data)
- goto out;
+ return -1;
+
if (data_page->order != buffer->subbuf_order)
- goto out;
+ return -1;
bpage = data_page->data;
if (!bpage)
- goto out;
+ return -1;
- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ guard(raw_spinlock_irqsave)(&cpu_buffer->reader_lock);
reader = rb_get_reader_page(cpu_buffer);
if (!reader)
- goto out_unlock;
+ return -1;
event = rb_reader_event(cpu_buffer);
@@ -6573,7 +6663,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
if (full &&
(!read || (len < (commit - read)) ||
cpu_buffer->reader_page == cpu_buffer->commit_page))
- goto out_unlock;
+ return -1;
if (len > (commit - read))
len = (commit - read);
@@ -6582,7 +6672,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
size = rb_event_ts_length(event);
if (len < size)
- goto out_unlock;
+ return -1;
/* save the current timestamp, since the user will need it */
save_timestamp = cpu_buffer->read_stamp;
@@ -6640,7 +6730,6 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
if (reader->real_end)
local_set(&bpage->commit, reader->real_end);
}
- ret = read;
cpu_buffer->lost_events = 0;
@@ -6667,11 +6756,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
if (commit < buffer->subbuf_size)
memset(&bpage->data[commit], 0, buffer->subbuf_size - commit);
- out_unlock:
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-
- out:
- return ret;
+ return read;
}
EXPORT_SYMBOL_GPL(ring_buffer_read_page);
@@ -6764,7 +6849,7 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
old_size = buffer->subbuf_size;
/* prevent another thread from changing buffer sizes */
- mutex_lock(&buffer->mutex);
+ guard(mutex)(&buffer->mutex);
atomic_inc(&buffer->record_disabled);
/* Make sure all commits have finished */
@@ -6869,7 +6954,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
}
atomic_dec(&buffer->record_disabled);
- mutex_unlock(&buffer->mutex);
return 0;
@@ -6878,7 +6962,6 @@ error:
buffer->subbuf_size = old_size;
atomic_dec(&buffer->record_disabled);
- mutex_unlock(&buffer->mutex);
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
@@ -6926,23 +7009,29 @@ static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
struct trace_buffer_meta *meta = cpu_buffer->meta_page;
unsigned int nr_subbufs = cpu_buffer->nr_pages + 1;
struct buffer_page *first_subbuf, *subbuf;
+ int cnt = 0;
int id = 0;
- subbuf_ids[id] = (unsigned long)cpu_buffer->reader_page->page;
- cpu_buffer->reader_page->id = id++;
+ id = rb_page_id(cpu_buffer, cpu_buffer->reader_page, id);
+ subbuf_ids[id++] = (unsigned long)cpu_buffer->reader_page->page;
+ cnt++;
first_subbuf = subbuf = rb_set_head_page(cpu_buffer);
do {
+ id = rb_page_id(cpu_buffer, subbuf, id);
+
if (WARN_ON(id >= nr_subbufs))
break;
subbuf_ids[id] = (unsigned long)subbuf->page;
- subbuf->id = id;
rb_inc_page(&subbuf);
id++;
+ cnt++;
} while (subbuf != first_subbuf);
+ WARN_ON(cnt != nr_subbufs);
+
/* install subbuf ID to kern VA translation */
cpu_buffer->subbuf_ids = subbuf_ids;
@@ -7034,7 +7123,7 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
{
unsigned long nr_subbufs, nr_pages, nr_vma_pages, pgoff = vma->vm_pgoff;
unsigned int subbuf_pages, subbuf_order;
- struct page **pages;
+ struct page **pages __free(kfree) = NULL;
int p = 0, s = 0;
int err;
@@ -7102,10 +7191,8 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
struct page *page;
int off = 0;
- if (WARN_ON_ONCE(s >= nr_subbufs)) {
- err = -EINVAL;
- goto out;
- }
+ if (WARN_ON_ONCE(s >= nr_subbufs))
+ return -EINVAL;
page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]);
@@ -7120,9 +7207,6 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
err = vm_insert_pages(vma, vma->vm_start, pages, &nr_pages);
-out:
- kfree(pages);
-
return err;
}
#else
@@ -7138,36 +7222,34 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long flags, *subbuf_ids;
- int err = 0;
+ int err;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -EINVAL;
cpu_buffer = buffer->buffers[cpu];
- mutex_lock(&cpu_buffer->mapping_lock);
+ guard(mutex)(&cpu_buffer->mapping_lock);
if (cpu_buffer->user_mapped) {
err = __rb_map_vma(cpu_buffer, vma);
if (!err)
err = __rb_inc_dec_mapped(cpu_buffer, true);
- mutex_unlock(&cpu_buffer->mapping_lock);
return err;
}
/* prevent another thread from changing buffer/sub-buffer sizes */
- mutex_lock(&buffer->mutex);
+ guard(mutex)(&buffer->mutex);
err = rb_alloc_meta_page(cpu_buffer);
if (err)
- goto unlock;
+ return err;
/* subbuf_ids include the reader while nr_pages does not */
subbuf_ids = kcalloc(cpu_buffer->nr_pages + 1, sizeof(*subbuf_ids), GFP_KERNEL);
if (!subbuf_ids) {
rb_free_meta_page(cpu_buffer);
- err = -ENOMEM;
- goto unlock;
+ return -ENOMEM;
}
atomic_inc(&cpu_buffer->resize_disabled);
@@ -7195,35 +7277,29 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
atomic_dec(&cpu_buffer->resize_disabled);
}
-unlock:
- mutex_unlock(&buffer->mutex);
- mutex_unlock(&cpu_buffer->mapping_lock);
-
- return err;
+ return 0;
}
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long flags;
- int err = 0;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -EINVAL;
cpu_buffer = buffer->buffers[cpu];
- mutex_lock(&cpu_buffer->mapping_lock);
+ guard(mutex)(&cpu_buffer->mapping_lock);
if (!cpu_buffer->user_mapped) {
- err = -ENODEV;
- goto out;
+ return -ENODEV;
} else if (cpu_buffer->user_mapped > 1) {
__rb_inc_dec_mapped(cpu_buffer, false);
- goto out;
+ return 0;
}
- mutex_lock(&buffer->mutex);
+ guard(mutex)(&buffer->mutex);
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
/* This is the last user space mapping */
@@ -7238,12 +7314,7 @@ int ring_buffer_unmap(struct trace_buffer *buffer, int cpu)
rb_free_meta_page(cpu_buffer);
atomic_dec(&cpu_buffer->resize_disabled);
- mutex_unlock(&buffer->mutex);
-
-out:
- mutex_unlock(&cpu_buffer->mapping_lock);
-
- return err;
+ return 0;
}
int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu)
@@ -7284,8 +7355,8 @@ consume:
/* Check if any events were dropped */
missed_events = cpu_buffer->lost_events;
- if (cpu_buffer->reader_page != cpu_buffer->commit_page) {
- if (missed_events) {
+ if (missed_events) {
+ if (cpu_buffer->reader_page != cpu_buffer->commit_page) {
struct buffer_data_page *bpage = reader->page;
unsigned int commit;
/*
@@ -7306,13 +7377,23 @@ consume:
local_add(RB_MISSED_STORED, &bpage->commit);
}
local_add(RB_MISSED_EVENTS, &bpage->commit);
+ } else if (!WARN_ONCE(cpu_buffer->reader_page == cpu_buffer->tail_page,
+ "Reader on commit with %ld missed events",
+ missed_events)) {
+ /*
+ * There shouldn't be any missed events if the tail_page
+ * is on the reader page. But if the tail page is not on the
+ * reader page and the commit_page is, that would mean that
+ * there's a commit_overrun (an interrupt preempted an
+ * addition of an event and then filled the buffer
+ * with new events). In this case it's not an
+ * error, but it should still be reported.
+ *
+ * TODO: Add missed events to the page for user space to know.
+ */
+ pr_info("Ring buffer [%d] commit overrun lost %ld events at timestamp:%lld\n",
+ cpu, missed_events, cpu_buffer->reader_page->page->time_stamp);
}
- } else {
- /*
- * There really shouldn't be any missed events if the commit
- * is on the reader page.
- */
- WARN_ON_ONCE(missed_events);
}
cpu_buffer->lost_events = 0;
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index b39f36013ef2..5b4be87ba59d 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -1,19 +1,31 @@
# SPDX-License-Identifier: GPL-2.0-only
#
-config DA_MON_EVENTS
+config RV_MON_EVENTS
+ bool
+
+config RV_MON_MAINTENANCE_EVENTS
bool
config DA_MON_EVENTS_IMPLICIT
- select DA_MON_EVENTS
+ select RV_MON_EVENTS
+ select RV_MON_MAINTENANCE_EVENTS
bool
config DA_MON_EVENTS_ID
- select DA_MON_EVENTS
+ select RV_MON_EVENTS
+ select RV_MON_MAINTENANCE_EVENTS
+ bool
+
+config LTL_MON_EVENTS_ID
+ select RV_MON_EVENTS
+ bool
+
+config RV_LTL_MONITOR
bool
menuconfig RV
bool "Runtime Verification"
- depends on TRACING
+ select TRACING
help
Enable the kernel runtime verification infrastructure. RV is a
lightweight (yet rigorous) method that complements classical
@@ -25,15 +37,34 @@ menuconfig RV
For further information, see:
Documentation/trace/rv/runtime-verification.rst
+config RV_PER_TASK_MONITORS
+ int "Maximum number of per-task monitor"
+ depends on RV
+ range 1 8
+ default 2
+ help
+ This option configures the maximum number of per-task RV monitors that can run
+ simultaneously.
+
source "kernel/trace/rv/monitors/wip/Kconfig"
source "kernel/trace/rv/monitors/wwnr/Kconfig"
+
source "kernel/trace/rv/monitors/sched/Kconfig"
-source "kernel/trace/rv/monitors/tss/Kconfig"
source "kernel/trace/rv/monitors/sco/Kconfig"
source "kernel/trace/rv/monitors/snroc/Kconfig"
source "kernel/trace/rv/monitors/scpd/Kconfig"
source "kernel/trace/rv/monitors/snep/Kconfig"
-source "kernel/trace/rv/monitors/sncid/Kconfig"
+source "kernel/trace/rv/monitors/sts/Kconfig"
+source "kernel/trace/rv/monitors/nrp/Kconfig"
+source "kernel/trace/rv/monitors/sssw/Kconfig"
+source "kernel/trace/rv/monitors/opid/Kconfig"
+# Add new sched monitors here
+
+source "kernel/trace/rv/monitors/rtapp/Kconfig"
+source "kernel/trace/rv/monitors/pagefault/Kconfig"
+source "kernel/trace/rv/monitors/sleep/Kconfig"
+# Add new rtapp monitors here
+
# Add new monitors here
config RV_REACTORS
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index f9b2cd0483c3..750e4ad6fa0f 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -6,12 +6,17 @@ obj-$(CONFIG_RV) += rv.o
obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o
obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o
obj-$(CONFIG_RV_MON_SCHED) += monitors/sched/sched.o
-obj-$(CONFIG_RV_MON_TSS) += monitors/tss/tss.o
obj-$(CONFIG_RV_MON_SCO) += monitors/sco/sco.o
obj-$(CONFIG_RV_MON_SNROC) += monitors/snroc/snroc.o
obj-$(CONFIG_RV_MON_SCPD) += monitors/scpd/scpd.o
obj-$(CONFIG_RV_MON_SNEP) += monitors/snep/snep.o
-obj-$(CONFIG_RV_MON_SNCID) += monitors/sncid/sncid.o
+obj-$(CONFIG_RV_MON_RTAPP) += monitors/rtapp/rtapp.o
+obj-$(CONFIG_RV_MON_PAGEFAULT) += monitors/pagefault/pagefault.o
+obj-$(CONFIG_RV_MON_SLEEP) += monitors/sleep/sleep.o
+obj-$(CONFIG_RV_MON_STS) += monitors/sts/sts.o
+obj-$(CONFIG_RV_MON_NRP) += monitors/nrp/nrp.o
+obj-$(CONFIG_RV_MON_SSSW) += monitors/sssw/sssw.o
+obj-$(CONFIG_RV_MON_OPID) += monitors/opid/opid.o
# Add new monitors here
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
diff --git a/kernel/trace/rv/monitors/tss/Kconfig b/kernel/trace/rv/monitors/nrp/Kconfig
index 479f86f52e60..f5ec08f65535 100644
--- a/kernel/trace/rv/monitors/tss/Kconfig
+++ b/kernel/trace/rv/monitors/nrp/Kconfig
@@ -1,14 +1,16 @@
# SPDX-License-Identifier: GPL-2.0-only
#
-config RV_MON_TSS
+config RV_MON_NRP
depends on RV
depends on RV_MON_SCHED
- default y
- select DA_MON_EVENTS_IMPLICIT
- bool "tss monitor"
+ default y if !ARM64
+ select DA_MON_EVENTS_ID
+ bool "nrp monitor"
help
- Monitor to ensure sched_switch happens only in scheduling context.
+ Monitor to ensure preemption requires need resched.
This monitor is part of the sched monitors collection.
+ This monitor is unstable on arm64, say N unless you are testing it.
+
For further information, see:
Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/nrp/nrp.c b/kernel/trace/rv/monitors/nrp/nrp.c
new file mode 100644
index 000000000000..5a83b7171432
--- /dev/null
+++ b/kernel/trace/rv/monitors/nrp/nrp.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "nrp"
+
+#include <trace/events/irq.h>
+#include <trace/events/sched.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "nrp.h"
+
+static struct rv_monitor rv_nrp;
+DECLARE_DA_MON_PER_TASK(nrp, unsigned char);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/trace/irq_vectors.h>
+
+static void handle_vector_irq_entry(void *data, int vector)
+{
+ da_handle_event_nrp(current, irq_entry_nrp);
+}
+
+static void attach_vector_irq(void)
+{
+ rv_attach_trace_probe("nrp", local_timer_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_IRQ_WORK))
+ rv_attach_trace_probe("nrp", irq_work_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ rv_attach_trace_probe("nrp", reschedule_entry, handle_vector_irq_entry);
+ rv_attach_trace_probe("nrp", call_function_entry, handle_vector_irq_entry);
+ rv_attach_trace_probe("nrp", call_function_single_entry, handle_vector_irq_entry);
+ }
+}
+
+static void detach_vector_irq(void)
+{
+ rv_detach_trace_probe("nrp", local_timer_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_IRQ_WORK))
+ rv_detach_trace_probe("nrp", irq_work_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ rv_detach_trace_probe("nrp", reschedule_entry, handle_vector_irq_entry);
+ rv_detach_trace_probe("nrp", call_function_entry, handle_vector_irq_entry);
+ rv_detach_trace_probe("nrp", call_function_single_entry, handle_vector_irq_entry);
+ }
+}
+
+#else
+/* We assume irq_entry tracepoints are sufficient on other architectures */
+static void attach_vector_irq(void) { }
+static void detach_vector_irq(void) { }
+#endif
+
+static void handle_irq_entry(void *data, int irq, struct irqaction *action)
+{
+ da_handle_event_nrp(current, irq_entry_nrp);
+}
+
+static void handle_sched_need_resched(void *data, struct task_struct *tsk,
+ int cpu, int tif)
+{
+ /*
+ * Although need_resched leads to both the rescheduling and preempt_irq
+ * states, it is safer to start the monitor always in preempt_irq,
+ * which may not mirror the system state but makes the monitor simpler,
+ */
+ if (tif == TIF_NEED_RESCHED)
+ da_handle_start_event_nrp(tsk, sched_need_resched_nrp);
+}
+
+static void handle_schedule_entry(void *data, bool preempt)
+{
+ if (preempt)
+ da_handle_event_nrp(current, schedule_entry_preempt_nrp);
+ else
+ da_handle_event_nrp(current, schedule_entry_nrp);
+}
+
+static int enable_nrp(void)
+{
+ int retval;
+
+ retval = da_monitor_init_nrp();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("nrp", irq_handler_entry, handle_irq_entry);
+ rv_attach_trace_probe("nrp", sched_set_need_resched_tp, handle_sched_need_resched);
+ rv_attach_trace_probe("nrp", sched_entry_tp, handle_schedule_entry);
+ attach_vector_irq();
+
+ return 0;
+}
+
+static void disable_nrp(void)
+{
+ rv_nrp.enabled = 0;
+
+ rv_detach_trace_probe("nrp", irq_handler_entry, handle_irq_entry);
+ rv_detach_trace_probe("nrp", sched_set_need_resched_tp, handle_sched_need_resched);
+ rv_detach_trace_probe("nrp", sched_entry_tp, handle_schedule_entry);
+ detach_vector_irq();
+
+ da_monitor_destroy_nrp();
+}
+
+static struct rv_monitor rv_nrp = {
+ .name = "nrp",
+ .description = "need resched preempts.",
+ .enable = enable_nrp,
+ .disable = disable_nrp,
+ .reset = da_monitor_reset_all_nrp,
+ .enabled = 0,
+};
+
+static int __init register_nrp(void)
+{
+ return rv_register_monitor(&rv_nrp, &rv_sched);
+}
+
+static void __exit unregister_nrp(void)
+{
+ rv_unregister_monitor(&rv_nrp);
+}
+
+module_init(register_nrp);
+module_exit(unregister_nrp);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("nrp: need resched preempts.");
diff --git a/kernel/trace/rv/monitors/nrp/nrp.h b/kernel/trace/rv/monitors/nrp/nrp.h
new file mode 100644
index 000000000000..c9f12207cbf6
--- /dev/null
+++ b/kernel/trace/rv/monitors/nrp/nrp.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of nrp automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_nrp {
+ preempt_irq_nrp = 0,
+ any_thread_running_nrp,
+ nested_preempt_nrp,
+ rescheduling_nrp,
+ state_max_nrp
+};
+
+#define INVALID_STATE state_max_nrp
+
+enum events_nrp {
+ irq_entry_nrp = 0,
+ sched_need_resched_nrp,
+ schedule_entry_nrp,
+ schedule_entry_preempt_nrp,
+ event_max_nrp
+};
+
+struct automaton_nrp {
+ char *state_names[state_max_nrp];
+ char *event_names[event_max_nrp];
+ unsigned char function[state_max_nrp][event_max_nrp];
+ unsigned char initial_state;
+ bool final_states[state_max_nrp];
+};
+
+static const struct automaton_nrp automaton_nrp = {
+ .state_names = {
+ "preempt_irq",
+ "any_thread_running",
+ "nested_preempt",
+ "rescheduling"
+ },
+ .event_names = {
+ "irq_entry",
+ "sched_need_resched",
+ "schedule_entry",
+ "schedule_entry_preempt"
+ },
+ .function = {
+ {
+ preempt_irq_nrp,
+ preempt_irq_nrp,
+ nested_preempt_nrp,
+ nested_preempt_nrp
+ },
+ {
+ any_thread_running_nrp,
+ rescheduling_nrp,
+ any_thread_running_nrp,
+ INVALID_STATE
+ },
+ {
+ nested_preempt_nrp,
+ preempt_irq_nrp,
+ any_thread_running_nrp,
+ any_thread_running_nrp
+ },
+ {
+ preempt_irq_nrp,
+ rescheduling_nrp,
+ any_thread_running_nrp,
+ any_thread_running_nrp
+ },
+ },
+ .initial_state = preempt_irq_nrp,
+ .final_states = { 0, 1, 0, 0 },
+};
diff --git a/kernel/trace/rv/monitors/nrp/nrp_trace.h b/kernel/trace/rv/monitors/nrp/nrp_trace.h
new file mode 100644
index 000000000000..2e13497de3b6
--- /dev/null
+++ b/kernel/trace/rv/monitors/nrp/nrp_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_NRP
+DEFINE_EVENT(event_da_monitor_id, event_nrp,
+ TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(id, state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor_id, error_nrp,
+ TP_PROTO(int id, char *state, char *event),
+ TP_ARGS(id, state, event));
+#endif /* CONFIG_RV_MON_NRP */
diff --git a/kernel/trace/rv/monitors/opid/Kconfig b/kernel/trace/rv/monitors/opid/Kconfig
new file mode 100644
index 000000000000..561d32da572b
--- /dev/null
+++ b/kernel/trace/rv/monitors/opid/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_OPID
+ depends on RV
+ depends on TRACE_IRQFLAGS
+ depends on TRACE_PREEMPT_TOGGLE
+ depends on RV_MON_SCHED
+ default y if PREEMPT_RT
+ select DA_MON_EVENTS_IMPLICIT
+ bool "opid monitor"
+ help
+ Monitor to ensure operations like wakeup and need resched occur with
+ interrupts and preemption disabled or during IRQs, where preemption
+ may not be disabled explicitly.
+
+ This monitor is unstable on !PREEMPT_RT, say N unless you are testing it.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/opid/opid.c b/kernel/trace/rv/monitors/opid/opid.c
new file mode 100644
index 000000000000..50d64e7fb8c4
--- /dev/null
+++ b/kernel/trace/rv/monitors/opid/opid.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "opid"
+
+#include <trace/events/sched.h>
+#include <trace/events/irq.h>
+#include <trace/events/preemptirq.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "opid.h"
+
+static struct rv_monitor rv_opid;
+DECLARE_DA_MON_PER_CPU(opid, unsigned char);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/trace/irq_vectors.h>
+
+static void handle_vector_irq_entry(void *data, int vector)
+{
+ da_handle_event_opid(irq_entry_opid);
+}
+
+static void attach_vector_irq(void)
+{
+ rv_attach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_IRQ_WORK))
+ rv_attach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ rv_attach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry);
+ rv_attach_trace_probe("opid", call_function_entry, handle_vector_irq_entry);
+ rv_attach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry);
+ }
+}
+
+static void detach_vector_irq(void)
+{
+ rv_detach_trace_probe("opid", local_timer_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_IRQ_WORK))
+ rv_detach_trace_probe("opid", irq_work_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ rv_detach_trace_probe("opid", reschedule_entry, handle_vector_irq_entry);
+ rv_detach_trace_probe("opid", call_function_entry, handle_vector_irq_entry);
+ rv_detach_trace_probe("opid", call_function_single_entry, handle_vector_irq_entry);
+ }
+}
+
+#else
+/* We assume irq_entry tracepoints are sufficient on other architectures */
+static void attach_vector_irq(void) { }
+static void detach_vector_irq(void) { }
+#endif
+
+static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_opid(irq_disable_opid);
+}
+
+static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_opid(irq_enable_opid);
+}
+
+static void handle_irq_entry(void *data, int irq, struct irqaction *action)
+{
+ da_handle_event_opid(irq_entry_opid);
+}
+
+static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_opid(preempt_disable_opid);
+}
+
+static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_opid(preempt_enable_opid);
+}
+
+static void handle_sched_need_resched(void *data, struct task_struct *tsk, int cpu, int tif)
+{
+ /* The monitor's intitial state is not in_irq */
+ if (this_cpu_read(hardirq_context))
+ da_handle_event_opid(sched_need_resched_opid);
+ else
+ da_handle_start_event_opid(sched_need_resched_opid);
+}
+
+static void handle_sched_waking(void *data, struct task_struct *p)
+{
+ /* The monitor's intitial state is not in_irq */
+ if (this_cpu_read(hardirq_context))
+ da_handle_event_opid(sched_waking_opid);
+ else
+ da_handle_start_event_opid(sched_waking_opid);
+}
+
+static int enable_opid(void)
+{
+ int retval;
+
+ retval = da_monitor_init_opid();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("opid", irq_disable, handle_irq_disable);
+ rv_attach_trace_probe("opid", irq_enable, handle_irq_enable);
+ rv_attach_trace_probe("opid", irq_handler_entry, handle_irq_entry);
+ rv_attach_trace_probe("opid", preempt_disable, handle_preempt_disable);
+ rv_attach_trace_probe("opid", preempt_enable, handle_preempt_enable);
+ rv_attach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched);
+ rv_attach_trace_probe("opid", sched_waking, handle_sched_waking);
+ attach_vector_irq();
+
+ return 0;
+}
+
+static void disable_opid(void)
+{
+ rv_opid.enabled = 0;
+
+ rv_detach_trace_probe("opid", irq_disable, handle_irq_disable);
+ rv_detach_trace_probe("opid", irq_enable, handle_irq_enable);
+ rv_detach_trace_probe("opid", irq_handler_entry, handle_irq_entry);
+ rv_detach_trace_probe("opid", preempt_disable, handle_preempt_disable);
+ rv_detach_trace_probe("opid", preempt_enable, handle_preempt_enable);
+ rv_detach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched);
+ rv_detach_trace_probe("opid", sched_waking, handle_sched_waking);
+ detach_vector_irq();
+
+ da_monitor_destroy_opid();
+}
+
+/*
+ * This is the monitor register section.
+ */
+static struct rv_monitor rv_opid = {
+ .name = "opid",
+ .description = "operations with preemption and irq disabled.",
+ .enable = enable_opid,
+ .disable = disable_opid,
+ .reset = da_monitor_reset_all_opid,
+ .enabled = 0,
+};
+
+static int __init register_opid(void)
+{
+ return rv_register_monitor(&rv_opid, &rv_sched);
+}
+
+static void __exit unregister_opid(void)
+{
+ rv_unregister_monitor(&rv_opid);
+}
+
+module_init(register_opid);
+module_exit(unregister_opid);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("opid: operations with preemption and irq disabled.");
diff --git a/kernel/trace/rv/monitors/opid/opid.h b/kernel/trace/rv/monitors/opid/opid.h
new file mode 100644
index 000000000000..b4b8c2ff7f64
--- /dev/null
+++ b/kernel/trace/rv/monitors/opid/opid.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of opid automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_opid {
+ disabled_opid = 0,
+ enabled_opid,
+ in_irq_opid,
+ irq_disabled_opid,
+ preempt_disabled_opid,
+ state_max_opid
+};
+
+#define INVALID_STATE state_max_opid
+
+enum events_opid {
+ irq_disable_opid = 0,
+ irq_enable_opid,
+ irq_entry_opid,
+ preempt_disable_opid,
+ preempt_enable_opid,
+ sched_need_resched_opid,
+ sched_waking_opid,
+ event_max_opid
+};
+
+struct automaton_opid {
+ char *state_names[state_max_opid];
+ char *event_names[event_max_opid];
+ unsigned char function[state_max_opid][event_max_opid];
+ unsigned char initial_state;
+ bool final_states[state_max_opid];
+};
+
+static const struct automaton_opid automaton_opid = {
+ .state_names = {
+ "disabled",
+ "enabled",
+ "in_irq",
+ "irq_disabled",
+ "preempt_disabled"
+ },
+ .event_names = {
+ "irq_disable",
+ "irq_enable",
+ "irq_entry",
+ "preempt_disable",
+ "preempt_enable",
+ "sched_need_resched",
+ "sched_waking"
+ },
+ .function = {
+ {
+ INVALID_STATE,
+ preempt_disabled_opid,
+ disabled_opid,
+ INVALID_STATE,
+ irq_disabled_opid,
+ disabled_opid,
+ disabled_opid
+ },
+ {
+ irq_disabled_opid,
+ INVALID_STATE,
+ INVALID_STATE,
+ preempt_disabled_opid,
+ enabled_opid,
+ INVALID_STATE,
+ INVALID_STATE
+ },
+ {
+ INVALID_STATE,
+ enabled_opid,
+ in_irq_opid,
+ INVALID_STATE,
+ INVALID_STATE,
+ in_irq_opid,
+ in_irq_opid
+ },
+ {
+ INVALID_STATE,
+ enabled_opid,
+ in_irq_opid,
+ disabled_opid,
+ INVALID_STATE,
+ irq_disabled_opid,
+ INVALID_STATE
+ },
+ {
+ disabled_opid,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ enabled_opid,
+ INVALID_STATE,
+ INVALID_STATE
+ },
+ },
+ .initial_state = disabled_opid,
+ .final_states = { 0, 1, 0, 0, 0 },
+};
diff --git a/kernel/trace/rv/monitors/sncid/sncid_trace.h b/kernel/trace/rv/monitors/opid/opid_trace.h
index 3ce42a57671d..3df6ff955c30 100644
--- a/kernel/trace/rv/monitors/sncid/sncid_trace.h
+++ b/kernel/trace/rv/monitors/opid/opid_trace.h
@@ -4,12 +4,12 @@
* Snippet to be included in rv_trace.h
*/
-#ifdef CONFIG_RV_MON_SNCID
-DEFINE_EVENT(event_da_monitor, event_sncid,
+#ifdef CONFIG_RV_MON_OPID
+DEFINE_EVENT(event_da_monitor, event_opid,
TP_PROTO(char *state, char *event, char *next_state, bool final_state),
TP_ARGS(state, event, next_state, final_state));
-DEFINE_EVENT(error_da_monitor, error_sncid,
+DEFINE_EVENT(error_da_monitor, error_opid,
TP_PROTO(char *state, char *event),
TP_ARGS(state, event));
-#endif /* CONFIG_RV_MON_SNCID */
+#endif /* CONFIG_RV_MON_OPID */
diff --git a/kernel/trace/rv/monitors/pagefault/Kconfig b/kernel/trace/rv/monitors/pagefault/Kconfig
new file mode 100644
index 000000000000..5e16625f1653
--- /dev/null
+++ b/kernel/trace/rv/monitors/pagefault/Kconfig
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_PAGEFAULT
+ depends on RV
+ select RV_LTL_MONITOR
+ depends on RV_MON_RTAPP
+ depends on X86 || RISCV
+ default y
+ select LTL_MON_EVENTS_ID
+ bool "pagefault monitor"
+ help
+ Monitor that real-time tasks do not raise page faults, causing
+ undesirable latency.
+
+ If you are developing a real-time system and not entirely sure whether
+ the applications are designed correctly for real-time, you want to say
+ Y here.
+
+ This monitor does not affect execution speed while it is not running,
+ therefore it is safe to enable this in production kernel.
diff --git a/kernel/trace/rv/monitors/pagefault/pagefault.c b/kernel/trace/rv/monitors/pagefault/pagefault.c
new file mode 100644
index 000000000000..9fe6123b2200
--- /dev/null
+++ b/kernel/trace/rv/monitors/pagefault/pagefault.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rv.h>
+#include <linux/sched/deadline.h>
+#include <linux/sched/rt.h>
+#include <linux/tracepoint.h>
+#include <rv/instrumentation.h>
+
+#define MODULE_NAME "pagefault"
+
+#include <rv_trace.h>
+#include <trace/events/exceptions.h>
+#include <monitors/rtapp/rtapp.h>
+
+#include "pagefault.h"
+#include <rv/ltl_monitor.h>
+
+static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon)
+{
+ /*
+ * This includes "actual" real-time tasks and also PI-boosted
+ * tasks. A task being PI-boosted means it is blocking an "actual"
+ * real-task, therefore it should also obey the monitor's rule,
+ * otherwise the "actual" real-task may be delayed.
+ */
+ ltl_atom_set(mon, LTL_RT, rt_or_dl_task(task));
+}
+
+static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bool task_creation)
+{
+ if (task_creation)
+ ltl_atom_set(mon, LTL_PAGEFAULT, false);
+}
+
+static void handle_page_fault(void *data, unsigned long address, struct pt_regs *regs,
+ unsigned long error_code)
+{
+ ltl_atom_pulse(current, LTL_PAGEFAULT, true);
+}
+
+static int enable_pagefault(void)
+{
+ int retval;
+
+ retval = ltl_monitor_init();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("rtapp_pagefault", page_fault_kernel, handle_page_fault);
+ rv_attach_trace_probe("rtapp_pagefault", page_fault_user, handle_page_fault);
+
+ return 0;
+}
+
+static void disable_pagefault(void)
+{
+ rv_detach_trace_probe("rtapp_pagefault", page_fault_kernel, handle_page_fault);
+ rv_detach_trace_probe("rtapp_pagefault", page_fault_user, handle_page_fault);
+
+ ltl_monitor_destroy();
+}
+
+static struct rv_monitor rv_pagefault = {
+ .name = "pagefault",
+ .description = "Monitor that RT tasks do not raise page faults",
+ .enable = enable_pagefault,
+ .disable = disable_pagefault,
+};
+
+static int __init register_pagefault(void)
+{
+ return rv_register_monitor(&rv_pagefault, &rv_rtapp);
+}
+
+static void __exit unregister_pagefault(void)
+{
+ rv_unregister_monitor(&rv_pagefault);
+}
+
+module_init(register_pagefault);
+module_exit(unregister_pagefault);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nam Cao <namcao@linutronix.de>");
+MODULE_DESCRIPTION("pagefault: Monitor that RT tasks do not raise page faults");
diff --git a/kernel/trace/rv/monitors/pagefault/pagefault.h b/kernel/trace/rv/monitors/pagefault/pagefault.h
new file mode 100644
index 000000000000..c580ec194009
--- /dev/null
+++ b/kernel/trace/rv/monitors/pagefault/pagefault.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * C implementation of Buchi automaton, automatically generated by
+ * tools/verification/rvgen from the linear temporal logic specification.
+ * For further information, see kernel documentation:
+ * Documentation/trace/rv/linear_temporal_logic.rst
+ */
+
+#include <linux/rv.h>
+
+#define MONITOR_NAME pagefault
+
+enum ltl_atom {
+ LTL_PAGEFAULT,
+ LTL_RT,
+ LTL_NUM_ATOM
+};
+static_assert(LTL_NUM_ATOM <= RV_MAX_LTL_ATOM);
+
+static const char *ltl_atom_str(enum ltl_atom atom)
+{
+ static const char *const names[] = {
+ "pa",
+ "rt",
+ };
+
+ return names[atom];
+}
+
+enum ltl_buchi_state {
+ S0,
+ RV_NUM_BA_STATES
+};
+static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES);
+
+static void ltl_start(struct task_struct *task, struct ltl_monitor *mon)
+{
+ bool pagefault = test_bit(LTL_PAGEFAULT, mon->atoms);
+ bool val3 = !pagefault;
+ bool rt = test_bit(LTL_RT, mon->atoms);
+ bool val1 = !rt;
+ bool val4 = val1 || val3;
+
+ if (val4)
+ __set_bit(S0, mon->states);
+}
+
+static void
+ltl_possible_next_states(struct ltl_monitor *mon, unsigned int state, unsigned long *next)
+{
+ bool pagefault = test_bit(LTL_PAGEFAULT, mon->atoms);
+ bool val3 = !pagefault;
+ bool rt = test_bit(LTL_RT, mon->atoms);
+ bool val1 = !rt;
+ bool val4 = val1 || val3;
+
+ switch (state) {
+ case S0:
+ if (val4)
+ __set_bit(S0, next);
+ break;
+ }
+}
diff --git a/kernel/trace/rv/monitors/pagefault/pagefault_trace.h b/kernel/trace/rv/monitors/pagefault/pagefault_trace.h
new file mode 100644
index 000000000000..fe1f82597b1a
--- /dev/null
+++ b/kernel/trace/rv/monitors/pagefault/pagefault_trace.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_PAGEFAULT
+DEFINE_EVENT(event_ltl_monitor_id, event_pagefault,
+ TP_PROTO(struct task_struct *task, char *states, char *atoms, char *next),
+ TP_ARGS(task, states, atoms, next));
+DEFINE_EVENT(error_ltl_monitor_id, error_pagefault,
+ TP_PROTO(struct task_struct *task),
+ TP_ARGS(task));
+#endif /* CONFIG_RV_MON_PAGEFAULT */
diff --git a/kernel/trace/rv/monitors/rtapp/Kconfig b/kernel/trace/rv/monitors/rtapp/Kconfig
new file mode 100644
index 000000000000..1ce9370a9ba8
--- /dev/null
+++ b/kernel/trace/rv/monitors/rtapp/Kconfig
@@ -0,0 +1,11 @@
+config RV_MON_RTAPP
+ depends on RV
+ depends on RV_PER_TASK_MONITORS >= 2
+ bool "rtapp monitor"
+ help
+ Collection of monitors to check for common problems with real-time
+ application that may cause unexpected latency.
+
+ If you are developing a real-time system and not entirely sure whether
+ the applications are designed correctly for real-time, you want to say
+ Y here.
diff --git a/kernel/trace/rv/monitors/rtapp/rtapp.c b/kernel/trace/rv/monitors/rtapp/rtapp.c
new file mode 100644
index 000000000000..fd75fc927d65
--- /dev/null
+++ b/kernel/trace/rv/monitors/rtapp/rtapp.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+
+#define MODULE_NAME "rtapp"
+
+#include "rtapp.h"
+
+struct rv_monitor rv_rtapp;
+
+struct rv_monitor rv_rtapp = {
+ .name = "rtapp",
+ .description = "Collection of monitors for detecting problems with real-time applications",
+};
+
+static int __init register_rtapp(void)
+{
+ return rv_register_monitor(&rv_rtapp, NULL);
+}
+
+static void __exit unregister_rtapp(void)
+{
+ rv_unregister_monitor(&rv_rtapp);
+}
+
+module_init(register_rtapp);
+module_exit(unregister_rtapp);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nam Cao <namcao@linutronix.de>");
+MODULE_DESCRIPTION("Collection of monitors for detecting problems with real-time applications");
diff --git a/kernel/trace/rv/monitors/rtapp/rtapp.h b/kernel/trace/rv/monitors/rtapp/rtapp.h
new file mode 100644
index 000000000000..4c200d67c7f6
--- /dev/null
+++ b/kernel/trace/rv/monitors/rtapp/rtapp.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+extern struct rv_monitor rv_rtapp;
diff --git a/kernel/trace/rv/monitors/sched/Kconfig b/kernel/trace/rv/monitors/sched/Kconfig
index ae3eb410abd7..aa16456da864 100644
--- a/kernel/trace/rv/monitors/sched/Kconfig
+++ b/kernel/trace/rv/monitors/sched/Kconfig
@@ -2,6 +2,7 @@
#
config RV_MON_SCHED
depends on RV
+ depends on RV_PER_TASK_MONITORS >= 3
bool "sched monitor"
help
Collection of monitors to check the scheduler behaves according to specifications.
diff --git a/kernel/trace/rv/monitors/sched/sched.c b/kernel/trace/rv/monitors/sched/sched.c
index 905e03c3c934..d04db4b543f9 100644
--- a/kernel/trace/rv/monitors/sched/sched.c
+++ b/kernel/trace/rv/monitors/sched/sched.c
@@ -21,8 +21,7 @@ struct rv_monitor rv_sched = {
static int __init register_sched(void)
{
- rv_register_monitor(&rv_sched, NULL);
- return 0;
+ return rv_register_monitor(&rv_sched, NULL);
}
static void __exit unregister_sched(void)
diff --git a/kernel/trace/rv/monitors/sco/sco.c b/kernel/trace/rv/monitors/sco/sco.c
index 4cff59220bfc..04c36405e2e3 100644
--- a/kernel/trace/rv/monitors/sco/sco.c
+++ b/kernel/trace/rv/monitors/sco/sco.c
@@ -24,12 +24,12 @@ static void handle_sched_set_state(void *data, struct task_struct *tsk, int stat
da_handle_start_event_sco(sched_set_state_sco);
}
-static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
+static void handle_schedule_entry(void *data, bool preempt)
{
da_handle_event_sco(schedule_entry_sco);
}
-static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
+static void handle_schedule_exit(void *data, bool is_switch)
{
da_handle_start_event_sco(schedule_exit_sco);
}
@@ -71,8 +71,7 @@ static struct rv_monitor rv_sco = {
static int __init register_sco(void)
{
- rv_register_monitor(&rv_sco, &rv_sched);
- return 0;
+ return rv_register_monitor(&rv_sco, &rv_sched);
}
static void __exit unregister_sco(void)
diff --git a/kernel/trace/rv/monitors/scpd/Kconfig b/kernel/trace/rv/monitors/scpd/Kconfig
index b9114fbf680f..682d0416188b 100644
--- a/kernel/trace/rv/monitors/scpd/Kconfig
+++ b/kernel/trace/rv/monitors/scpd/Kconfig
@@ -2,7 +2,7 @@
#
config RV_MON_SCPD
depends on RV
- depends on PREEMPT_TRACER
+ depends on TRACE_PREEMPT_TOGGLE
depends on RV_MON_SCHED
default y
select DA_MON_EVENTS_IMPLICIT
diff --git a/kernel/trace/rv/monitors/scpd/scpd.c b/kernel/trace/rv/monitors/scpd/scpd.c
index cbdd6a5f8d7f..1e351ba52fee 100644
--- a/kernel/trace/rv/monitors/scpd/scpd.c
+++ b/kernel/trace/rv/monitors/scpd/scpd.c
@@ -30,12 +30,12 @@ static void handle_preempt_enable(void *data, unsigned long ip, unsigned long pa
da_handle_start_event_scpd(preempt_enable_scpd);
}
-static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
+static void handle_schedule_entry(void *data, bool preempt)
{
da_handle_event_scpd(schedule_entry_scpd);
}
-static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
+static void handle_schedule_exit(void *data, bool is_switch)
{
da_handle_event_scpd(schedule_exit_scpd);
}
@@ -79,8 +79,7 @@ static struct rv_monitor rv_scpd = {
static int __init register_scpd(void)
{
- rv_register_monitor(&rv_scpd, &rv_sched);
- return 0;
+ return rv_register_monitor(&rv_scpd, &rv_sched);
}
static void __exit unregister_scpd(void)
diff --git a/kernel/trace/rv/monitors/sleep/Kconfig b/kernel/trace/rv/monitors/sleep/Kconfig
new file mode 100644
index 000000000000..6b7a122e7b47
--- /dev/null
+++ b/kernel/trace/rv/monitors/sleep/Kconfig
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_SLEEP
+ depends on RV
+ select RV_LTL_MONITOR
+ depends on HAVE_SYSCALL_TRACEPOINTS
+ depends on RV_MON_RTAPP
+ select TRACE_IRQFLAGS
+ default y
+ select LTL_MON_EVENTS_ID
+ bool "sleep monitor"
+ help
+ Monitor that real-time tasks do not sleep in a manner that may
+ cause undesirable latency.
+
+ If you are developing a real-time system and not entirely sure whether
+ the applications are designed correctly for real-time, you want to say
+ Y here.
+
+ Enabling this monitor may have performance impact (due to select
+ TRACE_IRQFLAGS). Therefore, you probably should say N for
+ production kernel.
diff --git a/kernel/trace/rv/monitors/sleep/sleep.c b/kernel/trace/rv/monitors/sleep/sleep.c
new file mode 100644
index 000000000000..eea447b06907
--- /dev/null
+++ b/kernel/trace/rv/monitors/sleep/sleep.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rv.h>
+#include <linux/sched/deadline.h>
+#include <linux/sched/rt.h>
+#include <rv/instrumentation.h>
+
+#define MODULE_NAME "sleep"
+
+#include <trace/events/syscalls.h>
+#include <trace/events/sched.h>
+#include <trace/events/lock.h>
+#include <uapi/linux/futex.h>
+#include <rv_trace.h>
+#include <monitors/rtapp/rtapp.h>
+
+#include "sleep.h"
+#include <rv/ltl_monitor.h>
+
+static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon)
+{
+ /*
+ * This includes "actual" real-time tasks and also PI-boosted
+ * tasks. A task being PI-boosted means it is blocking an "actual"
+ * real-task, therefore it should also obey the monitor's rule,
+ * otherwise the "actual" real-task may be delayed.
+ */
+ ltl_atom_set(mon, LTL_RT, rt_or_dl_task(task));
+}
+
+static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bool task_creation)
+{
+ ltl_atom_set(mon, LTL_SLEEP, false);
+ ltl_atom_set(mon, LTL_WAKE, false);
+ ltl_atom_set(mon, LTL_ABORT_SLEEP, false);
+ ltl_atom_set(mon, LTL_WOKEN_BY_HARDIRQ, false);
+ ltl_atom_set(mon, LTL_WOKEN_BY_NMI, false);
+ ltl_atom_set(mon, LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO, false);
+
+ if (task_creation) {
+ ltl_atom_set(mon, LTL_KTHREAD_SHOULD_STOP, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_TAI, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
+ ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false);
+ ltl_atom_set(mon, LTL_FUTEX_WAIT, false);
+ ltl_atom_set(mon, LTL_FUTEX_LOCK_PI, false);
+ ltl_atom_set(mon, LTL_BLOCK_ON_RT_MUTEX, false);
+ }
+
+ if (task->flags & PF_KTHREAD) {
+ ltl_atom_set(mon, LTL_KERNEL_THREAD, true);
+
+ /* kernel tasks do not do syscall */
+ ltl_atom_set(mon, LTL_FUTEX_WAIT, false);
+ ltl_atom_set(mon, LTL_FUTEX_LOCK_PI, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_TAI, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
+ ltl_atom_set(mon, LTL_CLOCK_NANOSLEEP, false);
+
+ if (strstarts(task->comm, "migration/"))
+ ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, true);
+ else
+ ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, false);
+
+ if (strstarts(task->comm, "rcu"))
+ ltl_atom_set(mon, LTL_TASK_IS_RCU, true);
+ else
+ ltl_atom_set(mon, LTL_TASK_IS_RCU, false);
+ } else {
+ ltl_atom_set(mon, LTL_KTHREAD_SHOULD_STOP, false);
+ ltl_atom_set(mon, LTL_KERNEL_THREAD, false);
+ ltl_atom_set(mon, LTL_TASK_IS_RCU, false);
+ ltl_atom_set(mon, LTL_TASK_IS_MIGRATION, false);
+ }
+
+}
+
+static void handle_sched_set_state(void *data, struct task_struct *task, int state)
+{
+ if (state & TASK_INTERRUPTIBLE)
+ ltl_atom_pulse(task, LTL_SLEEP, true);
+ else if (state == TASK_RUNNING)
+ ltl_atom_pulse(task, LTL_ABORT_SLEEP, true);
+}
+
+static void handle_sched_wakeup(void *data, struct task_struct *task)
+{
+ ltl_atom_pulse(task, LTL_WAKE, true);
+}
+
+static void handle_sched_waking(void *data, struct task_struct *task)
+{
+ if (this_cpu_read(hardirq_context)) {
+ ltl_atom_pulse(task, LTL_WOKEN_BY_HARDIRQ, true);
+ } else if (in_task()) {
+ if (current->prio <= task->prio)
+ ltl_atom_pulse(task, LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO, true);
+ } else if (in_nmi()) {
+ ltl_atom_pulse(task, LTL_WOKEN_BY_NMI, true);
+ }
+}
+
+static void handle_contention_begin(void *data, void *lock, unsigned int flags)
+{
+ if (flags & LCB_F_RT)
+ ltl_atom_update(current, LTL_BLOCK_ON_RT_MUTEX, true);
+}
+
+static void handle_contention_end(void *data, void *lock, int ret)
+{
+ ltl_atom_update(current, LTL_BLOCK_ON_RT_MUTEX, false);
+}
+
+static void handle_sys_enter(void *data, struct pt_regs *regs, long id)
+{
+ struct ltl_monitor *mon;
+ unsigned long args[6];
+ int op, cmd;
+
+ mon = ltl_get_monitor(current);
+
+ switch (id) {
+ case __NR_clock_nanosleep:
+#ifdef __NR_clock_nanosleep_time64
+ case __NR_clock_nanosleep_time64:
+#endif
+ syscall_get_arguments(current, regs, args);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, args[0] == CLOCK_MONOTONIC);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_TAI, args[0] == CLOCK_TAI);
+ ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, args[1] == TIMER_ABSTIME);
+ ltl_atom_update(current, LTL_CLOCK_NANOSLEEP, true);
+ break;
+
+ case __NR_futex:
+#ifdef __NR_futex_time64
+ case __NR_futex_time64:
+#endif
+ syscall_get_arguments(current, regs, args);
+ op = args[1];
+ cmd = op & FUTEX_CMD_MASK;
+
+ switch (cmd) {
+ case FUTEX_LOCK_PI:
+ case FUTEX_LOCK_PI2:
+ ltl_atom_update(current, LTL_FUTEX_LOCK_PI, true);
+ break;
+ case FUTEX_WAIT:
+ case FUTEX_WAIT_BITSET:
+ case FUTEX_WAIT_REQUEUE_PI:
+ ltl_atom_update(current, LTL_FUTEX_WAIT, true);
+ break;
+ }
+ break;
+ }
+}
+
+static void handle_sys_exit(void *data, struct pt_regs *regs, long ret)
+{
+ struct ltl_monitor *mon = ltl_get_monitor(current);
+
+ ltl_atom_set(mon, LTL_FUTEX_LOCK_PI, false);
+ ltl_atom_set(mon, LTL_FUTEX_WAIT, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_MONOTONIC, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_CLOCK_TAI, false);
+ ltl_atom_set(mon, LTL_NANOSLEEP_TIMER_ABSTIME, false);
+ ltl_atom_update(current, LTL_CLOCK_NANOSLEEP, false);
+}
+
+static void handle_kthread_stop(void *data, struct task_struct *task)
+{
+ /* FIXME: this could race with other tracepoint handlers */
+ ltl_atom_update(task, LTL_KTHREAD_SHOULD_STOP, true);
+}
+
+static int enable_sleep(void)
+{
+ int retval;
+
+ retval = ltl_monitor_init();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("rtapp_sleep", sched_waking, handle_sched_waking);
+ rv_attach_trace_probe("rtapp_sleep", sched_wakeup, handle_sched_wakeup);
+ rv_attach_trace_probe("rtapp_sleep", sched_set_state_tp, handle_sched_set_state);
+ rv_attach_trace_probe("rtapp_sleep", contention_begin, handle_contention_begin);
+ rv_attach_trace_probe("rtapp_sleep", contention_end, handle_contention_end);
+ rv_attach_trace_probe("rtapp_sleep", sched_kthread_stop, handle_kthread_stop);
+ rv_attach_trace_probe("rtapp_sleep", sys_enter, handle_sys_enter);
+ rv_attach_trace_probe("rtapp_sleep", sys_exit, handle_sys_exit);
+ return 0;
+}
+
+static void disable_sleep(void)
+{
+ rv_detach_trace_probe("rtapp_sleep", sched_waking, handle_sched_waking);
+ rv_detach_trace_probe("rtapp_sleep", sched_wakeup, handle_sched_wakeup);
+ rv_detach_trace_probe("rtapp_sleep", sched_set_state_tp, handle_sched_set_state);
+ rv_detach_trace_probe("rtapp_sleep", contention_begin, handle_contention_begin);
+ rv_detach_trace_probe("rtapp_sleep", contention_end, handle_contention_end);
+ rv_detach_trace_probe("rtapp_sleep", sched_kthread_stop, handle_kthread_stop);
+ rv_detach_trace_probe("rtapp_sleep", sys_enter, handle_sys_enter);
+ rv_detach_trace_probe("rtapp_sleep", sys_exit, handle_sys_exit);
+
+ ltl_monitor_destroy();
+}
+
+static struct rv_monitor rv_sleep = {
+ .name = "sleep",
+ .description = "Monitor that RT tasks do not undesirably sleep",
+ .enable = enable_sleep,
+ .disable = disable_sleep,
+};
+
+static int __init register_sleep(void)
+{
+ return rv_register_monitor(&rv_sleep, &rv_rtapp);
+}
+
+static void __exit unregister_sleep(void)
+{
+ rv_unregister_monitor(&rv_sleep);
+}
+
+module_init(register_sleep);
+module_exit(unregister_sleep);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Nam Cao <namcao@linutronix.de>");
+MODULE_DESCRIPTION("sleep: Monitor that RT tasks do not undesirably sleep");
diff --git a/kernel/trace/rv/monitors/sleep/sleep.h b/kernel/trace/rv/monitors/sleep/sleep.h
new file mode 100644
index 000000000000..2ab46fd218d2
--- /dev/null
+++ b/kernel/trace/rv/monitors/sleep/sleep.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * C implementation of Buchi automaton, automatically generated by
+ * tools/verification/rvgen from the linear temporal logic specification.
+ * For further information, see kernel documentation:
+ * Documentation/trace/rv/linear_temporal_logic.rst
+ */
+
+#include <linux/rv.h>
+
+#define MONITOR_NAME sleep
+
+enum ltl_atom {
+ LTL_ABORT_SLEEP,
+ LTL_BLOCK_ON_RT_MUTEX,
+ LTL_CLOCK_NANOSLEEP,
+ LTL_FUTEX_LOCK_PI,
+ LTL_FUTEX_WAIT,
+ LTL_KERNEL_THREAD,
+ LTL_KTHREAD_SHOULD_STOP,
+ LTL_NANOSLEEP_CLOCK_MONOTONIC,
+ LTL_NANOSLEEP_CLOCK_TAI,
+ LTL_NANOSLEEP_TIMER_ABSTIME,
+ LTL_RT,
+ LTL_SLEEP,
+ LTL_TASK_IS_MIGRATION,
+ LTL_TASK_IS_RCU,
+ LTL_WAKE,
+ LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
+ LTL_WOKEN_BY_HARDIRQ,
+ LTL_WOKEN_BY_NMI,
+ LTL_NUM_ATOM
+};
+static_assert(LTL_NUM_ATOM <= RV_MAX_LTL_ATOM);
+
+static const char *ltl_atom_str(enum ltl_atom atom)
+{
+ static const char *const names[] = {
+ "ab_sl",
+ "bl_on_rt_mu",
+ "cl_na",
+ "fu_lo_pi",
+ "fu_wa",
+ "ker_th",
+ "kth_sh_st",
+ "na_cl_mo",
+ "na_cl_ta",
+ "na_ti_ab",
+ "rt",
+ "sl",
+ "ta_mi",
+ "ta_rc",
+ "wak",
+ "wo_eq_hi_pr",
+ "wo_ha",
+ "wo_nm",
+ };
+
+ return names[atom];
+}
+
+enum ltl_buchi_state {
+ S0,
+ S1,
+ S2,
+ S3,
+ S4,
+ S5,
+ S6,
+ S7,
+ RV_NUM_BA_STATES
+};
+static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES);
+
+static void ltl_start(struct task_struct *task, struct ltl_monitor *mon)
+{
+ bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms);
+ bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms);
+ bool val40 = task_is_rcu || task_is_migration;
+ bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms);
+ bool val41 = futex_lock_pi || val40;
+ bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms);
+ bool val5 = block_on_rt_mutex || val41;
+ bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon->atoms);
+ bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms);
+ bool val32 = abort_sleep || kthread_should_stop;
+ bool woken_by_nmi = test_bit(LTL_WOKEN_BY_NMI, mon->atoms);
+ bool val33 = woken_by_nmi || val32;
+ bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms);
+ bool val34 = woken_by_hardirq || val33;
+ bool woken_by_equal_or_higher_prio = test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
+ mon->atoms);
+ bool val14 = woken_by_equal_or_higher_prio || val34;
+ bool wake = test_bit(LTL_WAKE, mon->atoms);
+ bool val13 = !wake;
+ bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms);
+ bool nanosleep_clock_tai = test_bit(LTL_NANOSLEEP_CLOCK_TAI, mon->atoms);
+ bool nanosleep_clock_monotonic = test_bit(LTL_NANOSLEEP_CLOCK_MONOTONIC, mon->atoms);
+ bool val24 = nanosleep_clock_monotonic || nanosleep_clock_tai;
+ bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME, mon->atoms);
+ bool val25 = nanosleep_timer_abstime && val24;
+ bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms);
+ bool val18 = clock_nanosleep && val25;
+ bool futex_wait = test_bit(LTL_FUTEX_WAIT, mon->atoms);
+ bool val9 = futex_wait || val18;
+ bool val11 = val9 || kernel_thread;
+ bool sleep = test_bit(LTL_SLEEP, mon->atoms);
+ bool val2 = !sleep;
+ bool rt = test_bit(LTL_RT, mon->atoms);
+ bool val1 = !rt;
+ bool val3 = val1 || val2;
+
+ if (val3)
+ __set_bit(S0, mon->states);
+ if (val11 && val13)
+ __set_bit(S1, mon->states);
+ if (val11 && val14)
+ __set_bit(S4, mon->states);
+ if (val5)
+ __set_bit(S5, mon->states);
+}
+
+static void
+ltl_possible_next_states(struct ltl_monitor *mon, unsigned int state, unsigned long *next)
+{
+ bool task_is_migration = test_bit(LTL_TASK_IS_MIGRATION, mon->atoms);
+ bool task_is_rcu = test_bit(LTL_TASK_IS_RCU, mon->atoms);
+ bool val40 = task_is_rcu || task_is_migration;
+ bool futex_lock_pi = test_bit(LTL_FUTEX_LOCK_PI, mon->atoms);
+ bool val41 = futex_lock_pi || val40;
+ bool block_on_rt_mutex = test_bit(LTL_BLOCK_ON_RT_MUTEX, mon->atoms);
+ bool val5 = block_on_rt_mutex || val41;
+ bool kthread_should_stop = test_bit(LTL_KTHREAD_SHOULD_STOP, mon->atoms);
+ bool abort_sleep = test_bit(LTL_ABORT_SLEEP, mon->atoms);
+ bool val32 = abort_sleep || kthread_should_stop;
+ bool woken_by_nmi = test_bit(LTL_WOKEN_BY_NMI, mon->atoms);
+ bool val33 = woken_by_nmi || val32;
+ bool woken_by_hardirq = test_bit(LTL_WOKEN_BY_HARDIRQ, mon->atoms);
+ bool val34 = woken_by_hardirq || val33;
+ bool woken_by_equal_or_higher_prio = test_bit(LTL_WOKEN_BY_EQUAL_OR_HIGHER_PRIO,
+ mon->atoms);
+ bool val14 = woken_by_equal_or_higher_prio || val34;
+ bool wake = test_bit(LTL_WAKE, mon->atoms);
+ bool val13 = !wake;
+ bool kernel_thread = test_bit(LTL_KERNEL_THREAD, mon->atoms);
+ bool nanosleep_clock_tai = test_bit(LTL_NANOSLEEP_CLOCK_TAI, mon->atoms);
+ bool nanosleep_clock_monotonic = test_bit(LTL_NANOSLEEP_CLOCK_MONOTONIC, mon->atoms);
+ bool val24 = nanosleep_clock_monotonic || nanosleep_clock_tai;
+ bool nanosleep_timer_abstime = test_bit(LTL_NANOSLEEP_TIMER_ABSTIME, mon->atoms);
+ bool val25 = nanosleep_timer_abstime && val24;
+ bool clock_nanosleep = test_bit(LTL_CLOCK_NANOSLEEP, mon->atoms);
+ bool val18 = clock_nanosleep && val25;
+ bool futex_wait = test_bit(LTL_FUTEX_WAIT, mon->atoms);
+ bool val9 = futex_wait || val18;
+ bool val11 = val9 || kernel_thread;
+ bool sleep = test_bit(LTL_SLEEP, mon->atoms);
+ bool val2 = !sleep;
+ bool rt = test_bit(LTL_RT, mon->atoms);
+ bool val1 = !rt;
+ bool val3 = val1 || val2;
+
+ switch (state) {
+ case S0:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S4, next);
+ if (val5)
+ __set_bit(S5, next);
+ break;
+ case S1:
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val13 && val3)
+ __set_bit(S2, next);
+ if (val14 && val3)
+ __set_bit(S3, next);
+ if (val11 && val14)
+ __set_bit(S4, next);
+ if (val13 && val5)
+ __set_bit(S6, next);
+ if (val14 && val5)
+ __set_bit(S7, next);
+ break;
+ case S2:
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val13 && val3)
+ __set_bit(S2, next);
+ if (val14 && val3)
+ __set_bit(S3, next);
+ if (val11 && val14)
+ __set_bit(S4, next);
+ if (val13 && val5)
+ __set_bit(S6, next);
+ if (val14 && val5)
+ __set_bit(S7, next);
+ break;
+ case S3:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S4, next);
+ if (val5)
+ __set_bit(S5, next);
+ break;
+ case S4:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S4, next);
+ if (val5)
+ __set_bit(S5, next);
+ break;
+ case S5:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S4, next);
+ if (val5)
+ __set_bit(S5, next);
+ break;
+ case S6:
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val13 && val3)
+ __set_bit(S2, next);
+ if (val14 && val3)
+ __set_bit(S3, next);
+ if (val11 && val14)
+ __set_bit(S4, next);
+ if (val13 && val5)
+ __set_bit(S6, next);
+ if (val14 && val5)
+ __set_bit(S7, next);
+ break;
+ case S7:
+ if (val3)
+ __set_bit(S0, next);
+ if (val11 && val13)
+ __set_bit(S1, next);
+ if (val11 && val14)
+ __set_bit(S4, next);
+ if (val5)
+ __set_bit(S5, next);
+ break;
+ }
+}
diff --git a/kernel/trace/rv/monitors/sleep/sleep_trace.h b/kernel/trace/rv/monitors/sleep/sleep_trace.h
new file mode 100644
index 000000000000..22eaf31da987
--- /dev/null
+++ b/kernel/trace/rv/monitors/sleep/sleep_trace.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_SLEEP
+DEFINE_EVENT(event_ltl_monitor_id, event_sleep,
+ TP_PROTO(struct task_struct *task, char *states, char *atoms, char *next),
+ TP_ARGS(task, states, atoms, next));
+DEFINE_EVENT(error_ltl_monitor_id, error_sleep,
+ TP_PROTO(struct task_struct *task),
+ TP_ARGS(task));
+#endif /* CONFIG_RV_MON_SLEEP */
diff --git a/kernel/trace/rv/monitors/sncid/sncid.c b/kernel/trace/rv/monitors/sncid/sncid.c
deleted file mode 100644
index f5037cd6214c..000000000000
--- a/kernel/trace/rv/monitors/sncid/sncid.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ftrace.h>
-#include <linux/tracepoint.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rv.h>
-#include <rv/instrumentation.h>
-#include <rv/da_monitor.h>
-
-#define MODULE_NAME "sncid"
-
-#include <trace/events/sched.h>
-#include <trace/events/preemptirq.h>
-#include <rv_trace.h>
-#include <monitors/sched/sched.h>
-
-#include "sncid.h"
-
-static struct rv_monitor rv_sncid;
-DECLARE_DA_MON_PER_CPU(sncid, unsigned char);
-
-static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip)
-{
- da_handle_event_sncid(irq_disable_sncid);
-}
-
-static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip)
-{
- da_handle_start_event_sncid(irq_enable_sncid);
-}
-
-static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
-{
- da_handle_start_event_sncid(schedule_entry_sncid);
-}
-
-static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
-{
- da_handle_start_event_sncid(schedule_exit_sncid);
-}
-
-static int enable_sncid(void)
-{
- int retval;
-
- retval = da_monitor_init_sncid();
- if (retval)
- return retval;
-
- rv_attach_trace_probe("sncid", irq_disable, handle_irq_disable);
- rv_attach_trace_probe("sncid", irq_enable, handle_irq_enable);
- rv_attach_trace_probe("sncid", sched_entry_tp, handle_schedule_entry);
- rv_attach_trace_probe("sncid", sched_exit_tp, handle_schedule_exit);
-
- return 0;
-}
-
-static void disable_sncid(void)
-{
- rv_sncid.enabled = 0;
-
- rv_detach_trace_probe("sncid", irq_disable, handle_irq_disable);
- rv_detach_trace_probe("sncid", irq_enable, handle_irq_enable);
- rv_detach_trace_probe("sncid", sched_entry_tp, handle_schedule_entry);
- rv_detach_trace_probe("sncid", sched_exit_tp, handle_schedule_exit);
-
- da_monitor_destroy_sncid();
-}
-
-static struct rv_monitor rv_sncid = {
- .name = "sncid",
- .description = "schedule not called with interrupt disabled.",
- .enable = enable_sncid,
- .disable = disable_sncid,
- .reset = da_monitor_reset_all_sncid,
- .enabled = 0,
-};
-
-static int __init register_sncid(void)
-{
- rv_register_monitor(&rv_sncid, &rv_sched);
- return 0;
-}
-
-static void __exit unregister_sncid(void)
-{
- rv_unregister_monitor(&rv_sncid);
-}
-
-module_init(register_sncid);
-module_exit(unregister_sncid);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
-MODULE_DESCRIPTION("sncid: schedule not called with interrupt disabled.");
diff --git a/kernel/trace/rv/monitors/sncid/sncid.h b/kernel/trace/rv/monitors/sncid/sncid.h
deleted file mode 100644
index 21304725142b..000000000000
--- a/kernel/trace/rv/monitors/sncid/sncid.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Automatically generated C representation of sncid automaton
- * For further information about this format, see kernel documentation:
- * Documentation/trace/rv/deterministic_automata.rst
- */
-
-enum states_sncid {
- can_sched_sncid = 0,
- cant_sched_sncid,
- state_max_sncid
-};
-
-#define INVALID_STATE state_max_sncid
-
-enum events_sncid {
- irq_disable_sncid = 0,
- irq_enable_sncid,
- schedule_entry_sncid,
- schedule_exit_sncid,
- event_max_sncid
-};
-
-struct automaton_sncid {
- char *state_names[state_max_sncid];
- char *event_names[event_max_sncid];
- unsigned char function[state_max_sncid][event_max_sncid];
- unsigned char initial_state;
- bool final_states[state_max_sncid];
-};
-
-static const struct automaton_sncid automaton_sncid = {
- .state_names = {
- "can_sched",
- "cant_sched"
- },
- .event_names = {
- "irq_disable",
- "irq_enable",
- "schedule_entry",
- "schedule_exit"
- },
- .function = {
- { cant_sched_sncid, INVALID_STATE, can_sched_sncid, can_sched_sncid },
- { INVALID_STATE, can_sched_sncid, INVALID_STATE, INVALID_STATE },
- },
- .initial_state = can_sched_sncid,
- .final_states = { 1, 0 },
-};
diff --git a/kernel/trace/rv/monitors/snep/Kconfig b/kernel/trace/rv/monitors/snep/Kconfig
index 77527f971232..7dd54f434ff7 100644
--- a/kernel/trace/rv/monitors/snep/Kconfig
+++ b/kernel/trace/rv/monitors/snep/Kconfig
@@ -2,7 +2,7 @@
#
config RV_MON_SNEP
depends on RV
- depends on PREEMPT_TRACER
+ depends on TRACE_PREEMPT_TOGGLE
depends on RV_MON_SCHED
default y
select DA_MON_EVENTS_IMPLICIT
diff --git a/kernel/trace/rv/monitors/snep/snep.c b/kernel/trace/rv/monitors/snep/snep.c
index 0076ba6d7ea4..558950f524a5 100644
--- a/kernel/trace/rv/monitors/snep/snep.c
+++ b/kernel/trace/rv/monitors/snep/snep.c
@@ -30,12 +30,12 @@ static void handle_preempt_enable(void *data, unsigned long ip, unsigned long pa
da_handle_start_event_snep(preempt_enable_snep);
}
-static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
+static void handle_schedule_entry(void *data, bool preempt)
{
da_handle_event_snep(schedule_entry_snep);
}
-static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
+static void handle_schedule_exit(void *data, bool is_switch)
{
da_handle_start_event_snep(schedule_exit_snep);
}
@@ -79,8 +79,7 @@ static struct rv_monitor rv_snep = {
static int __init register_snep(void)
{
- rv_register_monitor(&rv_snep, &rv_sched);
- return 0;
+ return rv_register_monitor(&rv_snep, &rv_sched);
}
static void __exit unregister_snep(void)
diff --git a/kernel/trace/rv/monitors/snep/snep.h b/kernel/trace/rv/monitors/snep/snep.h
index 6d16b9ad931e..4cd9abb77b7b 100644
--- a/kernel/trace/rv/monitors/snep/snep.h
+++ b/kernel/trace/rv/monitors/snep/snep.h
@@ -41,8 +41,18 @@ static const struct automaton_snep automaton_snep = {
"schedule_exit"
},
.function = {
- { non_scheduling_context_snep, non_scheduling_context_snep, scheduling_contex_snep, INVALID_STATE },
- { INVALID_STATE, INVALID_STATE, INVALID_STATE, non_scheduling_context_snep },
+ {
+ non_scheduling_context_snep,
+ non_scheduling_context_snep,
+ scheduling_contex_snep,
+ INVALID_STATE
+ },
+ {
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ non_scheduling_context_snep
+ },
},
.initial_state = non_scheduling_context_snep,
.final_states = { 1, 0 },
diff --git a/kernel/trace/rv/monitors/snroc/snroc.c b/kernel/trace/rv/monitors/snroc/snroc.c
index bb1f60d55296..540e686e699f 100644
--- a/kernel/trace/rv/monitors/snroc/snroc.c
+++ b/kernel/trace/rv/monitors/snroc/snroc.c
@@ -68,8 +68,7 @@ static struct rv_monitor rv_snroc = {
static int __init register_snroc(void)
{
- rv_register_monitor(&rv_snroc, &rv_sched);
- return 0;
+ return rv_register_monitor(&rv_snroc, &rv_sched);
}
static void __exit unregister_snroc(void)
diff --git a/kernel/trace/rv/monitors/sncid/Kconfig b/kernel/trace/rv/monitors/sssw/Kconfig
index 76bcfef4fd10..23b7eeb38bbf 100644
--- a/kernel/trace/rv/monitors/sncid/Kconfig
+++ b/kernel/trace/rv/monitors/sssw/Kconfig
@@ -1,14 +1,14 @@
# SPDX-License-Identifier: GPL-2.0-only
#
-config RV_MON_SNCID
+config RV_MON_SSSW
depends on RV
- depends on IRQSOFF_TRACER
depends on RV_MON_SCHED
default y
- select DA_MON_EVENTS_IMPLICIT
- bool "sncid monitor"
+ select DA_MON_EVENTS_ID
+ bool "sssw monitor"
help
- Monitor to ensure schedule is not called with interrupt disabled.
+ Monitor to ensure sched_set_state to sleepable leads to sleeping and
+ sleeping tasks require wakeup.
This monitor is part of the sched monitors collection.
For further information, see:
diff --git a/kernel/trace/rv/monitors/sssw/sssw.c b/kernel/trace/rv/monitors/sssw/sssw.c
new file mode 100644
index 000000000000..84b8d890d9d4
--- /dev/null
+++ b/kernel/trace/rv/monitors/sssw/sssw.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "sssw"
+
+#include <trace/events/sched.h>
+#include <trace/events/signal.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "sssw.h"
+
+static struct rv_monitor rv_sssw;
+DECLARE_DA_MON_PER_TASK(sssw, unsigned char);
+
+static void handle_sched_set_state(void *data, struct task_struct *tsk, int state)
+{
+ if (state == TASK_RUNNING)
+ da_handle_start_event_sssw(tsk, sched_set_state_runnable_sssw);
+ else
+ da_handle_event_sssw(tsk, sched_set_state_sleepable_sssw);
+}
+
+static void handle_sched_switch(void *data, bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned int prev_state)
+{
+ if (preempt)
+ da_handle_event_sssw(prev, sched_switch_preempt_sssw);
+ else if (prev_state == TASK_RUNNING)
+ da_handle_event_sssw(prev, sched_switch_yield_sssw);
+ else if (prev_state == TASK_RTLOCK_WAIT)
+ /* special case of sleeping task with racy conditions */
+ da_handle_event_sssw(prev, sched_switch_blocking_sssw);
+ else
+ da_handle_event_sssw(prev, sched_switch_suspend_sssw);
+ da_handle_event_sssw(next, sched_switch_in_sssw);
+}
+
+static void handle_sched_wakeup(void *data, struct task_struct *p)
+{
+ /*
+ * Wakeup can also lead to signal_wakeup although the system is
+ * actually runnable. The monitor can safely start with this event.
+ */
+ da_handle_start_event_sssw(p, sched_wakeup_sssw);
+}
+
+static void handle_signal_deliver(void *data, int sig,
+ struct kernel_siginfo *info,
+ struct k_sigaction *ka)
+{
+ da_handle_event_sssw(current, signal_deliver_sssw);
+}
+
+static int enable_sssw(void)
+{
+ int retval;
+
+ retval = da_monitor_init_sssw();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("sssw", sched_set_state_tp, handle_sched_set_state);
+ rv_attach_trace_probe("sssw", sched_switch, handle_sched_switch);
+ rv_attach_trace_probe("sssw", sched_wakeup, handle_sched_wakeup);
+ rv_attach_trace_probe("sssw", signal_deliver, handle_signal_deliver);
+
+ return 0;
+}
+
+static void disable_sssw(void)
+{
+ rv_sssw.enabled = 0;
+
+ rv_detach_trace_probe("sssw", sched_set_state_tp, handle_sched_set_state);
+ rv_detach_trace_probe("sssw", sched_switch, handle_sched_switch);
+ rv_detach_trace_probe("sssw", sched_wakeup, handle_sched_wakeup);
+ rv_detach_trace_probe("sssw", signal_deliver, handle_signal_deliver);
+
+ da_monitor_destroy_sssw();
+}
+
+static struct rv_monitor rv_sssw = {
+ .name = "sssw",
+ .description = "set state sleep and wakeup.",
+ .enable = enable_sssw,
+ .disable = disable_sssw,
+ .reset = da_monitor_reset_all_sssw,
+ .enabled = 0,
+};
+
+static int __init register_sssw(void)
+{
+ return rv_register_monitor(&rv_sssw, &rv_sched);
+}
+
+static void __exit unregister_sssw(void)
+{
+ rv_unregister_monitor(&rv_sssw);
+}
+
+module_init(register_sssw);
+module_exit(unregister_sssw);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("sssw: set state sleep and wakeup.");
diff --git a/kernel/trace/rv/monitors/sssw/sssw.h b/kernel/trace/rv/monitors/sssw/sssw.h
new file mode 100644
index 000000000000..243d54050c94
--- /dev/null
+++ b/kernel/trace/rv/monitors/sssw/sssw.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of sssw automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_sssw {
+ runnable_sssw = 0,
+ signal_wakeup_sssw,
+ sleepable_sssw,
+ sleeping_sssw,
+ state_max_sssw
+};
+
+#define INVALID_STATE state_max_sssw
+
+enum events_sssw {
+ sched_set_state_runnable_sssw = 0,
+ sched_set_state_sleepable_sssw,
+ sched_switch_blocking_sssw,
+ sched_switch_in_sssw,
+ sched_switch_preempt_sssw,
+ sched_switch_suspend_sssw,
+ sched_switch_yield_sssw,
+ sched_wakeup_sssw,
+ signal_deliver_sssw,
+ event_max_sssw
+};
+
+struct automaton_sssw {
+ char *state_names[state_max_sssw];
+ char *event_names[event_max_sssw];
+ unsigned char function[state_max_sssw][event_max_sssw];
+ unsigned char initial_state;
+ bool final_states[state_max_sssw];
+};
+
+static const struct automaton_sssw automaton_sssw = {
+ .state_names = {
+ "runnable",
+ "signal_wakeup",
+ "sleepable",
+ "sleeping"
+ },
+ .event_names = {
+ "sched_set_state_runnable",
+ "sched_set_state_sleepable",
+ "sched_switch_blocking",
+ "sched_switch_in",
+ "sched_switch_preempt",
+ "sched_switch_suspend",
+ "sched_switch_yield",
+ "sched_wakeup",
+ "signal_deliver"
+ },
+ .function = {
+ {
+ runnable_sssw,
+ sleepable_sssw,
+ sleeping_sssw,
+ runnable_sssw,
+ runnable_sssw,
+ INVALID_STATE,
+ runnable_sssw,
+ runnable_sssw,
+ runnable_sssw
+ },
+ {
+ INVALID_STATE,
+ sleepable_sssw,
+ INVALID_STATE,
+ signal_wakeup_sssw,
+ signal_wakeup_sssw,
+ INVALID_STATE,
+ signal_wakeup_sssw,
+ signal_wakeup_sssw,
+ runnable_sssw
+ },
+ {
+ runnable_sssw,
+ sleepable_sssw,
+ sleeping_sssw,
+ sleepable_sssw,
+ sleepable_sssw,
+ sleeping_sssw,
+ signal_wakeup_sssw,
+ runnable_sssw,
+ sleepable_sssw
+ },
+ {
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ runnable_sssw,
+ INVALID_STATE
+ },
+ },
+ .initial_state = runnable_sssw,
+ .final_states = { 1, 0, 0, 0 },
+};
diff --git a/kernel/trace/rv/monitors/sssw/sssw_trace.h b/kernel/trace/rv/monitors/sssw/sssw_trace.h
new file mode 100644
index 000000000000..6c03cfc6960b
--- /dev/null
+++ b/kernel/trace/rv/monitors/sssw/sssw_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_SSSW
+DEFINE_EVENT(event_da_monitor_id, event_sssw,
+ TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(id, state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor_id, error_sssw,
+ TP_PROTO(int id, char *state, char *event),
+ TP_ARGS(id, state, event));
+#endif /* CONFIG_RV_MON_SSSW */
diff --git a/kernel/trace/rv/monitors/sts/Kconfig b/kernel/trace/rv/monitors/sts/Kconfig
new file mode 100644
index 000000000000..7d1ff0f6fc91
--- /dev/null
+++ b/kernel/trace/rv/monitors/sts/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+config RV_MON_STS
+ depends on RV
+ depends on TRACE_IRQFLAGS
+ depends on RV_MON_SCHED
+ default y
+ select DA_MON_EVENTS_IMPLICIT
+ bool "sts monitor"
+ help
+ Monitor to ensure relationships between scheduler and task switches
+ * the scheduler is called and returns with interrupts disabled
+ * each call to the scheduler has up to one switch
+ * switches only happen inside the scheduler
+ * each call to the scheduler disables interrupts to switch
+ This monitor is part of the sched monitors collection.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_sched.rst
diff --git a/kernel/trace/rv/monitors/sts/sts.c b/kernel/trace/rv/monitors/sts/sts.c
new file mode 100644
index 000000000000..c4a9cd67c1d2
--- /dev/null
+++ b/kernel/trace/rv/monitors/sts/sts.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "sts"
+
+#include <trace/events/sched.h>
+#include <trace/events/irq.h>
+#include <trace/events/preemptirq.h>
+#include <rv_trace.h>
+#include <monitors/sched/sched.h>
+
+#include "sts.h"
+
+static struct rv_monitor rv_sts;
+DECLARE_DA_MON_PER_CPU(sts, unsigned char);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/trace/irq_vectors.h>
+
+static void handle_vector_irq_entry(void *data, int vector)
+{
+ da_handle_event_sts(irq_entry_sts);
+}
+
+static void attach_vector_irq(void)
+{
+ rv_attach_trace_probe("sts", local_timer_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_IRQ_WORK))
+ rv_attach_trace_probe("sts", irq_work_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ rv_attach_trace_probe("sts", reschedule_entry, handle_vector_irq_entry);
+ rv_attach_trace_probe("sts", call_function_entry, handle_vector_irq_entry);
+ rv_attach_trace_probe("sts", call_function_single_entry, handle_vector_irq_entry);
+ }
+}
+
+static void detach_vector_irq(void)
+{
+ rv_detach_trace_probe("sts", local_timer_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_IRQ_WORK))
+ rv_detach_trace_probe("sts", irq_work_entry, handle_vector_irq_entry);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ rv_detach_trace_probe("sts", reschedule_entry, handle_vector_irq_entry);
+ rv_detach_trace_probe("sts", call_function_entry, handle_vector_irq_entry);
+ rv_detach_trace_probe("sts", call_function_single_entry, handle_vector_irq_entry);
+ }
+}
+
+#else
+/* We assume irq_entry tracepoints are sufficient on other architectures */
+static void attach_vector_irq(void) { }
+static void detach_vector_irq(void) { }
+#endif
+
+static void handle_irq_disable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_sts(irq_disable_sts);
+}
+
+static void handle_irq_enable(void *data, unsigned long ip, unsigned long parent_ip)
+{
+ da_handle_event_sts(irq_enable_sts);
+}
+
+static void handle_irq_entry(void *data, int irq, struct irqaction *action)
+{
+ da_handle_event_sts(irq_entry_sts);
+}
+
+static void handle_sched_switch(void *data, bool preempt,
+ struct task_struct *prev,
+ struct task_struct *next,
+ unsigned int prev_state)
+{
+ da_handle_event_sts(sched_switch_sts);
+}
+
+static void handle_schedule_entry(void *data, bool preempt)
+{
+ da_handle_event_sts(schedule_entry_sts);
+}
+
+static void handle_schedule_exit(void *data, bool is_switch)
+{
+ da_handle_start_event_sts(schedule_exit_sts);
+}
+
+static int enable_sts(void)
+{
+ int retval;
+
+ retval = da_monitor_init_sts();
+ if (retval)
+ return retval;
+
+ rv_attach_trace_probe("sts", irq_disable, handle_irq_disable);
+ rv_attach_trace_probe("sts", irq_enable, handle_irq_enable);
+ rv_attach_trace_probe("sts", irq_handler_entry, handle_irq_entry);
+ rv_attach_trace_probe("sts", sched_switch, handle_sched_switch);
+ rv_attach_trace_probe("sts", sched_entry_tp, handle_schedule_entry);
+ rv_attach_trace_probe("sts", sched_exit_tp, handle_schedule_exit);
+ attach_vector_irq();
+
+ return 0;
+}
+
+static void disable_sts(void)
+{
+ rv_sts.enabled = 0;
+
+ rv_detach_trace_probe("sts", irq_disable, handle_irq_disable);
+ rv_detach_trace_probe("sts", irq_enable, handle_irq_enable);
+ rv_detach_trace_probe("sts", irq_handler_entry, handle_irq_entry);
+ rv_detach_trace_probe("sts", sched_switch, handle_sched_switch);
+ rv_detach_trace_probe("sts", sched_entry_tp, handle_schedule_entry);
+ rv_detach_trace_probe("sts", sched_exit_tp, handle_schedule_exit);
+ detach_vector_irq();
+
+ da_monitor_destroy_sts();
+}
+
+/*
+ * This is the monitor register section.
+ */
+static struct rv_monitor rv_sts = {
+ .name = "sts",
+ .description = "schedule implies task switch.",
+ .enable = enable_sts,
+ .disable = disable_sts,
+ .reset = da_monitor_reset_all_sts,
+ .enabled = 0,
+};
+
+static int __init register_sts(void)
+{
+ return rv_register_monitor(&rv_sts, &rv_sched);
+}
+
+static void __exit unregister_sts(void)
+{
+ rv_unregister_monitor(&rv_sts);
+}
+
+module_init(register_sts);
+module_exit(unregister_sts);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
+MODULE_DESCRIPTION("sts: schedule implies task switch.");
diff --git a/kernel/trace/rv/monitors/sts/sts.h b/kernel/trace/rv/monitors/sts/sts.h
new file mode 100644
index 000000000000..3368b6599a00
--- /dev/null
+++ b/kernel/trace/rv/monitors/sts/sts.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Automatically generated C representation of sts automaton
+ * For further information about this format, see kernel documentation:
+ * Documentation/trace/rv/deterministic_automata.rst
+ */
+
+enum states_sts {
+ can_sched_sts = 0,
+ cant_sched_sts,
+ disable_to_switch_sts,
+ enable_to_exit_sts,
+ in_irq_sts,
+ scheduling_sts,
+ switching_sts,
+ state_max_sts
+};
+
+#define INVALID_STATE state_max_sts
+
+enum events_sts {
+ irq_disable_sts = 0,
+ irq_enable_sts,
+ irq_entry_sts,
+ sched_switch_sts,
+ schedule_entry_sts,
+ schedule_exit_sts,
+ event_max_sts
+};
+
+struct automaton_sts {
+ char *state_names[state_max_sts];
+ char *event_names[event_max_sts];
+ unsigned char function[state_max_sts][event_max_sts];
+ unsigned char initial_state;
+ bool final_states[state_max_sts];
+};
+
+static const struct automaton_sts automaton_sts = {
+ .state_names = {
+ "can_sched",
+ "cant_sched",
+ "disable_to_switch",
+ "enable_to_exit",
+ "in_irq",
+ "scheduling",
+ "switching"
+ },
+ .event_names = {
+ "irq_disable",
+ "irq_enable",
+ "irq_entry",
+ "sched_switch",
+ "schedule_entry",
+ "schedule_exit"
+ },
+ .function = {
+ {
+ cant_sched_sts,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ scheduling_sts,
+ INVALID_STATE
+ },
+ {
+ INVALID_STATE,
+ can_sched_sts,
+ cant_sched_sts,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE
+ },
+ {
+ INVALID_STATE,
+ enable_to_exit_sts,
+ in_irq_sts,
+ switching_sts,
+ INVALID_STATE,
+ INVALID_STATE
+ },
+ {
+ enable_to_exit_sts,
+ enable_to_exit_sts,
+ enable_to_exit_sts,
+ INVALID_STATE,
+ INVALID_STATE,
+ can_sched_sts
+ },
+ {
+ INVALID_STATE,
+ scheduling_sts,
+ in_irq_sts,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE
+ },
+ {
+ disable_to_switch_sts,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE
+ },
+ {
+ INVALID_STATE,
+ enable_to_exit_sts,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE,
+ INVALID_STATE
+ },
+ },
+ .initial_state = can_sched_sts,
+ .final_states = { 1, 0, 0, 0, 0, 0, 0 },
+};
diff --git a/kernel/trace/rv/monitors/tss/tss_trace.h b/kernel/trace/rv/monitors/sts/sts_trace.h
index 4619dbb50cc0..d78beb58d5b3 100644
--- a/kernel/trace/rv/monitors/tss/tss_trace.h
+++ b/kernel/trace/rv/monitors/sts/sts_trace.h
@@ -4,12 +4,12 @@
* Snippet to be included in rv_trace.h
*/
-#ifdef CONFIG_RV_MON_TSS
-DEFINE_EVENT(event_da_monitor, event_tss,
+#ifdef CONFIG_RV_MON_STS
+DEFINE_EVENT(event_da_monitor, event_sts,
TP_PROTO(char *state, char *event, char *next_state, bool final_state),
TP_ARGS(state, event, next_state, final_state));
-DEFINE_EVENT(error_da_monitor, error_tss,
+DEFINE_EVENT(error_da_monitor, error_sts,
TP_PROTO(char *state, char *event),
TP_ARGS(state, event));
-#endif /* CONFIG_RV_MON_TSS */
+#endif /* CONFIG_RV_MON_STS */
diff --git a/kernel/trace/rv/monitors/tss/tss.c b/kernel/trace/rv/monitors/tss/tss.c
deleted file mode 100644
index 542787e6524f..000000000000
--- a/kernel/trace/rv/monitors/tss/tss.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ftrace.h>
-#include <linux/tracepoint.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rv.h>
-#include <rv/instrumentation.h>
-#include <rv/da_monitor.h>
-
-#define MODULE_NAME "tss"
-
-#include <trace/events/sched.h>
-#include <rv_trace.h>
-#include <monitors/sched/sched.h>
-
-#include "tss.h"
-
-static struct rv_monitor rv_tss;
-DECLARE_DA_MON_PER_CPU(tss, unsigned char);
-
-static void handle_sched_switch(void *data, bool preempt,
- struct task_struct *prev,
- struct task_struct *next,
- unsigned int prev_state)
-{
- da_handle_event_tss(sched_switch_tss);
-}
-
-static void handle_schedule_entry(void *data, bool preempt, unsigned long ip)
-{
- da_handle_event_tss(schedule_entry_tss);
-}
-
-static void handle_schedule_exit(void *data, bool is_switch, unsigned long ip)
-{
- da_handle_start_event_tss(schedule_exit_tss);
-}
-
-static int enable_tss(void)
-{
- int retval;
-
- retval = da_monitor_init_tss();
- if (retval)
- return retval;
-
- rv_attach_trace_probe("tss", sched_switch, handle_sched_switch);
- rv_attach_trace_probe("tss", sched_entry_tp, handle_schedule_entry);
- rv_attach_trace_probe("tss", sched_exit_tp, handle_schedule_exit);
-
- return 0;
-}
-
-static void disable_tss(void)
-{
- rv_tss.enabled = 0;
-
- rv_detach_trace_probe("tss", sched_switch, handle_sched_switch);
- rv_detach_trace_probe("tss", sched_entry_tp, handle_schedule_entry);
- rv_detach_trace_probe("tss", sched_exit_tp, handle_schedule_exit);
-
- da_monitor_destroy_tss();
-}
-
-static struct rv_monitor rv_tss = {
- .name = "tss",
- .description = "task switch while scheduling.",
- .enable = enable_tss,
- .disable = disable_tss,
- .reset = da_monitor_reset_all_tss,
- .enabled = 0,
-};
-
-static int __init register_tss(void)
-{
- rv_register_monitor(&rv_tss, &rv_sched);
- return 0;
-}
-
-static void __exit unregister_tss(void)
-{
- rv_unregister_monitor(&rv_tss);
-}
-
-module_init(register_tss);
-module_exit(unregister_tss);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Gabriele Monaco <gmonaco@redhat.com>");
-MODULE_DESCRIPTION("tss: task switch while scheduling.");
diff --git a/kernel/trace/rv/monitors/tss/tss.h b/kernel/trace/rv/monitors/tss/tss.h
deleted file mode 100644
index f0a36fda1b87..000000000000
--- a/kernel/trace/rv/monitors/tss/tss.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Automatically generated C representation of tss automaton
- * For further information about this format, see kernel documentation:
- * Documentation/trace/rv/deterministic_automata.rst
- */
-
-enum states_tss {
- thread_tss = 0,
- sched_tss,
- state_max_tss
-};
-
-#define INVALID_STATE state_max_tss
-
-enum events_tss {
- sched_switch_tss = 0,
- schedule_entry_tss,
- schedule_exit_tss,
- event_max_tss
-};
-
-struct automaton_tss {
- char *state_names[state_max_tss];
- char *event_names[event_max_tss];
- unsigned char function[state_max_tss][event_max_tss];
- unsigned char initial_state;
- bool final_states[state_max_tss];
-};
-
-static const struct automaton_tss automaton_tss = {
- .state_names = {
- "thread",
- "sched"
- },
- .event_names = {
- "sched_switch",
- "schedule_entry",
- "schedule_exit"
- },
- .function = {
- { INVALID_STATE, sched_tss, INVALID_STATE },
- { sched_tss, INVALID_STATE, thread_tss },
- },
- .initial_state = thread_tss,
- .final_states = { 1, 0 },
-};
diff --git a/kernel/trace/rv/monitors/wip/Kconfig b/kernel/trace/rv/monitors/wip/Kconfig
index e464b9294865..87a26195792b 100644
--- a/kernel/trace/rv/monitors/wip/Kconfig
+++ b/kernel/trace/rv/monitors/wip/Kconfig
@@ -2,7 +2,7 @@
#
config RV_MON_WIP
depends on RV
- depends on PREEMPT_TRACER
+ depends on TRACE_PREEMPT_TOGGLE
select DA_MON_EVENTS_IMPLICIT
bool "wip monitor"
help
diff --git a/kernel/trace/rv/monitors/wip/wip.c b/kernel/trace/rv/monitors/wip/wip.c
index ed758fec8608..4b4e99615a11 100644
--- a/kernel/trace/rv/monitors/wip/wip.c
+++ b/kernel/trace/rv/monitors/wip/wip.c
@@ -71,8 +71,7 @@ static struct rv_monitor rv_wip = {
static int __init register_wip(void)
{
- rv_register_monitor(&rv_wip, NULL);
- return 0;
+ return rv_register_monitor(&rv_wip, NULL);
}
static void __exit unregister_wip(void)
diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.c b/kernel/trace/rv/monitors/wwnr/wwnr.c
index 172f31c4b0f3..4145bea2729e 100644
--- a/kernel/trace/rv/monitors/wwnr/wwnr.c
+++ b/kernel/trace/rv/monitors/wwnr/wwnr.c
@@ -70,8 +70,7 @@ static struct rv_monitor rv_wwnr = {
static int __init register_wwnr(void)
{
- rv_register_monitor(&rv_wwnr, NULL);
- return 0;
+ return rv_register_monitor(&rv_wwnr, NULL);
}
static void __exit unregister_wwnr(void)
diff --git a/kernel/trace/rv/reactor_panic.c b/kernel/trace/rv/reactor_panic.c
index 0186ff4cbd0b..74c6bcc2c749 100644
--- a/kernel/trace/rv/reactor_panic.c
+++ b/kernel/trace/rv/reactor_panic.c
@@ -13,9 +13,13 @@
#include <linux/init.h>
#include <linux/rv.h>
-static void rv_panic_reaction(char *msg)
+__printf(1, 2) static void rv_panic_reaction(const char *msg, ...)
{
- panic(msg);
+ va_list args;
+
+ va_start(args, msg);
+ vpanic(msg, args);
+ va_end(args);
}
static struct rv_reactor rv_panic = {
diff --git a/kernel/trace/rv/reactor_printk.c b/kernel/trace/rv/reactor_printk.c
index 178759dbf89f..2dae2916c05f 100644
--- a/kernel/trace/rv/reactor_printk.c
+++ b/kernel/trace/rv/reactor_printk.c
@@ -12,9 +12,13 @@
#include <linux/init.h>
#include <linux/rv.h>
-static void rv_printk_reaction(char *msg)
+__printf(1, 2) static void rv_printk_reaction(const char *msg, ...)
{
- printk_deferred(msg);
+ va_list args;
+
+ va_start(args, msg);
+ vprintk_deferred(msg, args);
+ va_end(args);
}
static struct rv_reactor rv_printk = {
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index e4077500a91d..bd7d56dbf6c2 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -143,7 +143,7 @@
#include <linux/init.h>
#include <linux/slab.h>
-#ifdef CONFIG_DA_MON_EVENTS
+#ifdef CONFIG_RV_MON_EVENTS
#define CREATE_TRACE_POINTS
#include <rv_trace.h>
#endif
@@ -165,7 +165,7 @@ struct dentry *get_monitors_root(void)
LIST_HEAD(rv_monitors_list);
static int task_monitor_count;
-static bool task_monitor_slots[RV_PER_TASK_MONITORS];
+static bool task_monitor_slots[CONFIG_RV_PER_TASK_MONITORS];
int rv_get_task_monitor_slot(void)
{
@@ -173,12 +173,12 @@ int rv_get_task_monitor_slot(void)
lockdep_assert_held(&rv_interface_lock);
- if (task_monitor_count == RV_PER_TASK_MONITORS)
+ if (task_monitor_count == CONFIG_RV_PER_TASK_MONITORS)
return -EBUSY;
task_monitor_count++;
- for (i = 0; i < RV_PER_TASK_MONITORS; i++) {
+ for (i = 0; i < CONFIG_RV_PER_TASK_MONITORS; i++) {
if (task_monitor_slots[i] == false) {
task_monitor_slots[i] = true;
return i;
@@ -194,7 +194,7 @@ void rv_put_task_monitor_slot(int slot)
{
lockdep_assert_held(&rv_interface_lock);
- if (slot < 0 || slot >= RV_PER_TASK_MONITORS) {
+ if (slot < 0 || slot >= CONFIG_RV_PER_TASK_MONITORS) {
WARN_ONCE(1, "RV releasing an invalid slot!: %d\n", slot);
return;
}
@@ -210,9 +210,9 @@ void rv_put_task_monitor_slot(int slot)
* Monitors with a parent are nested,
* Monitors without a parent could be standalone or containers.
*/
-bool rv_is_nested_monitor(struct rv_monitor_def *mdef)
+bool rv_is_nested_monitor(struct rv_monitor *mon)
{
- return mdef->parent != NULL;
+ return mon->parent != NULL;
}
/*
@@ -223,16 +223,16 @@ bool rv_is_nested_monitor(struct rv_monitor_def *mdef)
* for enable()/disable(). Use this condition to find empty containers.
* Keep both conditions in case we have some non-compliant containers.
*/
-bool rv_is_container_monitor(struct rv_monitor_def *mdef)
+bool rv_is_container_monitor(struct rv_monitor *mon)
{
- struct rv_monitor_def *next;
+ struct rv_monitor *next;
- if (list_is_last(&mdef->list, &rv_monitors_list))
+ if (list_is_last(&mon->list, &rv_monitors_list))
return false;
- next = list_next_entry(mdef, list);
+ next = list_next_entry(mon, list);
- return next->parent == mdef->monitor || !mdef->monitor->enable;
+ return next->parent == mon || !mon->enable;
}
/*
@@ -241,10 +241,10 @@ bool rv_is_container_monitor(struct rv_monitor_def *mdef)
static ssize_t monitor_enable_read_data(struct file *filp, char __user *user_buf, size_t count,
loff_t *ppos)
{
- struct rv_monitor_def *mdef = filp->private_data;
+ struct rv_monitor *mon = filp->private_data;
const char *buff;
- buff = mdef->monitor->enabled ? "1\n" : "0\n";
+ buff = mon->enabled ? "1\n" : "0\n";
return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff)+1);
}
@@ -252,14 +252,14 @@ static ssize_t monitor_enable_read_data(struct file *filp, char __user *user_buf
/*
* __rv_disable_monitor - disabled an enabled monitor
*/
-static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync)
+static int __rv_disable_monitor(struct rv_monitor *mon, bool sync)
{
lockdep_assert_held(&rv_interface_lock);
- if (mdef->monitor->enabled) {
- mdef->monitor->enabled = 0;
- if (mdef->monitor->disable)
- mdef->monitor->disable();
+ if (mon->enabled) {
+ mon->enabled = 0;
+ if (mon->disable)
+ mon->disable();
/*
* Wait for the execution of all events to finish.
@@ -273,90 +273,90 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync)
return 0;
}
-static void rv_disable_single(struct rv_monitor_def *mdef)
+static void rv_disable_single(struct rv_monitor *mon)
{
- __rv_disable_monitor(mdef, true);
+ __rv_disable_monitor(mon, true);
}
-static int rv_enable_single(struct rv_monitor_def *mdef)
+static int rv_enable_single(struct rv_monitor *mon)
{
int retval;
lockdep_assert_held(&rv_interface_lock);
- if (mdef->monitor->enabled)
+ if (mon->enabled)
return 0;
- retval = mdef->monitor->enable();
+ retval = mon->enable();
if (!retval)
- mdef->monitor->enabled = 1;
+ mon->enabled = 1;
return retval;
}
-static void rv_disable_container(struct rv_monitor_def *mdef)
+static void rv_disable_container(struct rv_monitor *mon)
{
- struct rv_monitor_def *p = mdef;
+ struct rv_monitor *p = mon;
int enabled = 0;
list_for_each_entry_continue(p, &rv_monitors_list, list) {
- if (p->parent != mdef->monitor)
+ if (p->parent != mon)
break;
enabled += __rv_disable_monitor(p, false);
}
if (enabled)
tracepoint_synchronize_unregister();
- mdef->monitor->enabled = 0;
+ mon->enabled = 0;
}
-static int rv_enable_container(struct rv_monitor_def *mdef)
+static int rv_enable_container(struct rv_monitor *mon)
{
- struct rv_monitor_def *p = mdef;
+ struct rv_monitor *p = mon;
int retval = 0;
list_for_each_entry_continue(p, &rv_monitors_list, list) {
- if (retval || p->parent != mdef->monitor)
+ if (retval || p->parent != mon)
break;
retval = rv_enable_single(p);
}
if (retval)
- rv_disable_container(mdef);
+ rv_disable_container(mon);
else
- mdef->monitor->enabled = 1;
+ mon->enabled = 1;
return retval;
}
/**
* rv_disable_monitor - disable a given runtime monitor
- * @mdef: Pointer to the monitor definition structure.
+ * @mon: Pointer to the monitor definition structure.
*
* Returns 0 on success.
*/
-int rv_disable_monitor(struct rv_monitor_def *mdef)
+int rv_disable_monitor(struct rv_monitor *mon)
{
- if (rv_is_container_monitor(mdef))
- rv_disable_container(mdef);
+ if (rv_is_container_monitor(mon))
+ rv_disable_container(mon);
else
- rv_disable_single(mdef);
+ rv_disable_single(mon);
return 0;
}
/**
* rv_enable_monitor - enable a given runtime monitor
- * @mdef: Pointer to the monitor definition structure.
+ * @mon: Pointer to the monitor definition structure.
*
* Returns 0 on success, error otherwise.
*/
-int rv_enable_monitor(struct rv_monitor_def *mdef)
+int rv_enable_monitor(struct rv_monitor *mon)
{
int retval;
- if (rv_is_container_monitor(mdef))
- retval = rv_enable_container(mdef);
+ if (rv_is_container_monitor(mon))
+ retval = rv_enable_container(mon);
else
- retval = rv_enable_single(mdef);
+ retval = rv_enable_single(mon);
return retval;
}
@@ -367,7 +367,7 @@ int rv_enable_monitor(struct rv_monitor_def *mdef)
static ssize_t monitor_enable_write_data(struct file *filp, const char __user *user_buf,
size_t count, loff_t *ppos)
{
- struct rv_monitor_def *mdef = filp->private_data;
+ struct rv_monitor *mon = filp->private_data;
int retval;
bool val;
@@ -378,9 +378,9 @@ static ssize_t monitor_enable_write_data(struct file *filp, const char __user *u
mutex_lock(&rv_interface_lock);
if (val)
- retval = rv_enable_monitor(mdef);
+ retval = rv_enable_monitor(mon);
else
- retval = rv_disable_monitor(mdef);
+ retval = rv_disable_monitor(mon);
mutex_unlock(&rv_interface_lock);
@@ -399,12 +399,12 @@ static const struct file_operations interface_enable_fops = {
static ssize_t monitor_desc_read_data(struct file *filp, char __user *user_buf, size_t count,
loff_t *ppos)
{
- struct rv_monitor_def *mdef = filp->private_data;
+ struct rv_monitor *mon = filp->private_data;
char buff[256];
memset(buff, 0, sizeof(buff));
- snprintf(buff, sizeof(buff), "%s\n", mdef->monitor->description);
+ snprintf(buff, sizeof(buff), "%s\n", mon->description);
return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff) + 1);
}
@@ -419,37 +419,37 @@ static const struct file_operations interface_desc_fops = {
* the monitor dir, where the specific options of the monitor
* are exposed.
*/
-static int create_monitor_dir(struct rv_monitor_def *mdef, struct rv_monitor_def *parent)
+static int create_monitor_dir(struct rv_monitor *mon, struct rv_monitor *parent)
{
struct dentry *root = parent ? parent->root_d : get_monitors_root();
- const char *name = mdef->monitor->name;
+ const char *name = mon->name;
struct dentry *tmp;
int retval;
- mdef->root_d = rv_create_dir(name, root);
- if (!mdef->root_d)
+ mon->root_d = rv_create_dir(name, root);
+ if (!mon->root_d)
return -ENOMEM;
- tmp = rv_create_file("enable", RV_MODE_WRITE, mdef->root_d, mdef, &interface_enable_fops);
+ tmp = rv_create_file("enable", RV_MODE_WRITE, mon->root_d, mon, &interface_enable_fops);
if (!tmp) {
retval = -ENOMEM;
goto out_remove_root;
}
- tmp = rv_create_file("desc", RV_MODE_READ, mdef->root_d, mdef, &interface_desc_fops);
+ tmp = rv_create_file("desc", RV_MODE_READ, mon->root_d, mon, &interface_desc_fops);
if (!tmp) {
retval = -ENOMEM;
goto out_remove_root;
}
- retval = reactor_populate_monitor(mdef);
+ retval = reactor_populate_monitor(mon);
if (retval)
goto out_remove_root;
return 0;
out_remove_root:
- rv_remove(mdef->root_d);
+ rv_remove(mon->root_d);
return retval;
}
@@ -458,13 +458,12 @@ out_remove_root:
*/
static int monitors_show(struct seq_file *m, void *p)
{
- struct rv_monitor_def *mon_def = p;
+ struct rv_monitor *mon = container_of(p, struct rv_monitor, list);
- if (mon_def->parent)
- seq_printf(m, "%s:%s\n", mon_def->parent->name,
- mon_def->monitor->name);
+ if (mon->parent)
+ seq_printf(m, "%s:%s\n", mon->parent->name, mon->name);
else
- seq_printf(m, "%s\n", mon_def->monitor->name);
+ seq_printf(m, "%s\n", mon->name);
return 0;
}
@@ -496,13 +495,13 @@ static void *available_monitors_next(struct seq_file *m, void *p, loff_t *pos)
*/
static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos)
{
- struct rv_monitor_def *m_def = p;
+ struct rv_monitor *mon = p;
(*pos)++;
- list_for_each_entry_continue(m_def, &rv_monitors_list, list) {
- if (m_def->monitor->enabled)
- return m_def;
+ list_for_each_entry_continue(mon, &rv_monitors_list, list) {
+ if (mon->enabled)
+ return mon;
}
return NULL;
@@ -510,7 +509,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos)
static void *enabled_monitors_start(struct seq_file *m, loff_t *pos)
{
- struct rv_monitor_def *m_def;
+ struct rv_monitor *mon;
loff_t l;
mutex_lock(&rv_interface_lock);
@@ -518,15 +517,15 @@ static void *enabled_monitors_start(struct seq_file *m, loff_t *pos)
if (list_empty(&rv_monitors_list))
return NULL;
- m_def = list_entry(&rv_monitors_list, struct rv_monitor_def, list);
+ mon = list_entry(&rv_monitors_list, struct rv_monitor, list);
for (l = 0; l <= *pos; ) {
- m_def = enabled_monitors_next(m, m_def, &l);
- if (!m_def)
+ mon = enabled_monitors_next(m, mon, &l);
+ if (!mon)
break;
}
- return m_def;
+ return mon;
}
/*
@@ -566,13 +565,13 @@ static const struct file_operations available_monitors_ops = {
*/
static void disable_all_monitors(void)
{
- struct rv_monitor_def *mdef;
+ struct rv_monitor *mon;
int enabled = 0;
mutex_lock(&rv_interface_lock);
- list_for_each_entry(mdef, &rv_monitors_list, list)
- enabled += __rv_disable_monitor(mdef, false);
+ list_for_each_entry(mon, &rv_monitors_list, list)
+ enabled += __rv_disable_monitor(mon, false);
if (enabled) {
/*
@@ -598,7 +597,7 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user
size_t count, loff_t *ppos)
{
char buff[MAX_RV_MONITOR_NAME_SIZE + 2];
- struct rv_monitor_def *mdef;
+ struct rv_monitor *mon;
int retval = -EINVAL;
bool enable = true;
char *ptr, *tmp;
@@ -633,17 +632,17 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user
if (tmp)
ptr = tmp+1;
- list_for_each_entry(mdef, &rv_monitors_list, list) {
- if (strcmp(ptr, mdef->monitor->name) != 0)
+ list_for_each_entry(mon, &rv_monitors_list, list) {
+ if (strcmp(ptr, mon->name) != 0)
continue;
/*
* Monitor found!
*/
if (enable)
- retval = rv_enable_monitor(mdef);
+ retval = rv_enable_monitor(mon);
else
- retval = rv_disable_monitor(mdef);
+ retval = rv_disable_monitor(mon);
if (!retval)
retval = count;
@@ -702,11 +701,11 @@ static void turn_monitoring_off(void)
static void reset_all_monitors(void)
{
- struct rv_monitor_def *mdef;
+ struct rv_monitor *mon;
- list_for_each_entry(mdef, &rv_monitors_list, list) {
- if (mdef->monitor->enabled && mdef->monitor->reset)
- mdef->monitor->reset();
+ list_for_each_entry(mon, &rv_monitors_list, list) {
+ if (mon->enabled && mon->reset)
+ mon->reset();
}
}
@@ -768,10 +767,9 @@ static const struct file_operations monitoring_on_fops = {
.read = monitoring_on_read_data,
};
-static void destroy_monitor_dir(struct rv_monitor_def *mdef)
+static void destroy_monitor_dir(struct rv_monitor *mon)
{
- reactor_cleanup_monitor(mdef);
- rv_remove(mdef->root_d);
+ rv_remove(mon->root_d);
}
/**
@@ -783,7 +781,7 @@ static void destroy_monitor_dir(struct rv_monitor_def *mdef)
*/
int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent)
{
- struct rv_monitor_def *r, *p = NULL;
+ struct rv_monitor *r;
int retval = 0;
if (strlen(monitor->name) >= MAX_RV_MONITOR_NAME_SIZE) {
@@ -795,49 +793,31 @@ int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent)
mutex_lock(&rv_interface_lock);
list_for_each_entry(r, &rv_monitors_list, list) {
- if (strcmp(monitor->name, r->monitor->name) == 0) {
+ if (strcmp(monitor->name, r->name) == 0) {
pr_info("Monitor %s is already registered\n", monitor->name);
retval = -EEXIST;
goto out_unlock;
}
}
- if (parent) {
- list_for_each_entry(r, &rv_monitors_list, list) {
- if (strcmp(parent->name, r->monitor->name) == 0) {
- p = r;
- break;
- }
- }
- }
-
- if (p && rv_is_nested_monitor(p)) {
+ if (parent && rv_is_nested_monitor(parent)) {
pr_info("Parent monitor %s is already nested, cannot nest further\n",
parent->name);
retval = -EINVAL;
goto out_unlock;
}
- r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL);
- if (!r) {
- retval = -ENOMEM;
- goto out_unlock;
- }
-
- r->monitor = monitor;
- r->parent = parent;
+ monitor->parent = parent;
- retval = create_monitor_dir(r, p);
- if (retval) {
- kfree(r);
- goto out_unlock;
- }
+ retval = create_monitor_dir(monitor, parent);
+ if (retval)
+ return retval;
/* keep children close to the parent for easier visualisation */
- if (p)
- list_add(&r->list, &p->list);
+ if (parent)
+ list_add(&monitor->list, &parent->list);
else
- list_add_tail(&r->list, &rv_monitors_list);
+ list_add_tail(&monitor->list, &rv_monitors_list);
out_unlock:
mutex_unlock(&rv_interface_lock);
@@ -852,17 +832,11 @@ out_unlock:
*/
int rv_unregister_monitor(struct rv_monitor *monitor)
{
- struct rv_monitor_def *ptr, *next;
-
mutex_lock(&rv_interface_lock);
- list_for_each_entry_safe(ptr, next, &rv_monitors_list, list) {
- if (strcmp(monitor->name, ptr->monitor->name) == 0) {
- rv_disable_monitor(ptr);
- list_del(&ptr->list);
- destroy_monitor_dir(ptr);
- }
- }
+ rv_disable_monitor(monitor);
+ list_del(&monitor->list);
+ destroy_monitor_dir(monitor);
mutex_unlock(&rv_interface_lock);
return 0;
diff --git a/kernel/trace/rv/rv.h b/kernel/trace/rv/rv.h
index 98fca0a1adbc..1485a70c1bf4 100644
--- a/kernel/trace/rv/rv.h
+++ b/kernel/trace/rv/rv.h
@@ -23,48 +23,21 @@ struct rv_interface {
extern struct mutex rv_interface_lock;
extern struct list_head rv_monitors_list;
-#ifdef CONFIG_RV_REACTORS
-struct rv_reactor_def {
- struct list_head list;
- struct rv_reactor *reactor;
- /* protected by the monitor interface lock */
- int counter;
-};
-#endif
-
-struct rv_monitor_def {
- struct list_head list;
- struct rv_monitor *monitor;
- struct rv_monitor *parent;
- struct dentry *root_d;
-#ifdef CONFIG_RV_REACTORS
- struct rv_reactor_def *rdef;
- bool reacting;
-#endif
- bool task_monitor;
-};
-
struct dentry *get_monitors_root(void);
-int rv_disable_monitor(struct rv_monitor_def *mdef);
-int rv_enable_monitor(struct rv_monitor_def *mdef);
-bool rv_is_container_monitor(struct rv_monitor_def *mdef);
-bool rv_is_nested_monitor(struct rv_monitor_def *mdef);
+int rv_disable_monitor(struct rv_monitor *mon);
+int rv_enable_monitor(struct rv_monitor *mon);
+bool rv_is_container_monitor(struct rv_monitor *mon);
+bool rv_is_nested_monitor(struct rv_monitor *mon);
#ifdef CONFIG_RV_REACTORS
-int reactor_populate_monitor(struct rv_monitor_def *mdef);
-void reactor_cleanup_monitor(struct rv_monitor_def *mdef);
+int reactor_populate_monitor(struct rv_monitor *mon);
int init_rv_reactors(struct dentry *root_dir);
#else
-static inline int reactor_populate_monitor(struct rv_monitor_def *mdef)
+static inline int reactor_populate_monitor(struct rv_monitor *mon)
{
return 0;
}
-static inline void reactor_cleanup_monitor(struct rv_monitor_def *mdef)
-{
- return;
-}
-
static inline int init_rv_reactors(struct dentry *root_dir)
{
return 0;
diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c
index 9501ca886d83..d32859fec238 100644
--- a/kernel/trace/rv/rv_reactors.c
+++ b/kernel/trace/rv/rv_reactors.c
@@ -70,12 +70,12 @@
*/
static LIST_HEAD(rv_reactors_list);
-static struct rv_reactor_def *get_reactor_rdef_by_name(char *name)
+static struct rv_reactor *get_reactor_rdef_by_name(char *name)
{
- struct rv_reactor_def *r;
+ struct rv_reactor *r;
list_for_each_entry(r, &rv_reactors_list, list) {
- if (strcmp(name, r->reactor->name) == 0)
+ if (strcmp(name, r->name) == 0)
return r;
}
return NULL;
@@ -86,9 +86,9 @@ static struct rv_reactor_def *get_reactor_rdef_by_name(char *name)
*/
static int reactors_show(struct seq_file *m, void *p)
{
- struct rv_reactor_def *rea_def = p;
+ struct rv_reactor *reactor = container_of(p, struct rv_reactor, list);
- seq_printf(m, "%s\n", rea_def->reactor->name);
+ seq_printf(m, "%s\n", reactor->name);
return 0;
}
@@ -138,13 +138,13 @@ static const struct file_operations available_reactors_ops = {
*/
static int monitor_reactor_show(struct seq_file *m, void *p)
{
- struct rv_monitor_def *mdef = m->private;
- struct rv_reactor_def *rdef = p;
+ struct rv_monitor *mon = m->private;
+ struct rv_reactor *reactor = container_of(p, struct rv_reactor, list);
- if (mdef->rdef == rdef)
- seq_printf(m, "[%s]\n", rdef->reactor->name);
+ if (mon->reactor == reactor)
+ seq_printf(m, "[%s]\n", reactor->name);
else
- seq_printf(m, "%s\n", rdef->reactor->name);
+ seq_printf(m, "%s\n", reactor->name);
return 0;
}
@@ -158,43 +158,37 @@ static const struct seq_operations monitor_reactors_seq_ops = {
.show = monitor_reactor_show
};
-static void monitor_swap_reactors_single(struct rv_monitor_def *mdef,
- struct rv_reactor_def *rdef,
- bool reacting, bool nested)
+static void monitor_swap_reactors_single(struct rv_monitor *mon,
+ struct rv_reactor *reactor,
+ bool nested)
{
bool monitor_enabled;
/* nothing to do */
- if (mdef->rdef == rdef)
+ if (mon->reactor == reactor)
return;
- monitor_enabled = mdef->monitor->enabled;
+ monitor_enabled = mon->enabled;
if (monitor_enabled)
- rv_disable_monitor(mdef);
+ rv_disable_monitor(mon);
- /* swap reactor's usage */
- mdef->rdef->counter--;
- rdef->counter++;
-
- mdef->rdef = rdef;
- mdef->reacting = reacting;
- mdef->monitor->react = rdef->reactor->react;
+ mon->reactor = reactor;
+ mon->react = reactor->react;
/* enable only once if iterating through a container */
if (monitor_enabled && !nested)
- rv_enable_monitor(mdef);
+ rv_enable_monitor(mon);
}
-static void monitor_swap_reactors(struct rv_monitor_def *mdef,
- struct rv_reactor_def *rdef, bool reacting)
+static void monitor_swap_reactors(struct rv_monitor *mon, struct rv_reactor *reactor)
{
- struct rv_monitor_def *p = mdef;
+ struct rv_monitor *p = mon;
- if (rv_is_container_monitor(mdef))
+ if (rv_is_container_monitor(mon))
list_for_each_entry_continue(p, &rv_monitors_list, list) {
- if (p->parent != mdef->monitor)
+ if (p->parent != mon)
break;
- monitor_swap_reactors_single(p, rdef, reacting, true);
+ monitor_swap_reactors_single(p, reactor, true);
}
/*
* This call enables and disables the monitor if they were active.
@@ -202,7 +196,7 @@ static void monitor_swap_reactors(struct rv_monitor_def *mdef,
* All nested monitors are enabled also if they were off, we may refine
* this logic in the future.
*/
- monitor_swap_reactors_single(mdef, rdef, reacting, false);
+ monitor_swap_reactors_single(mon, reactor, false);
}
static ssize_t
@@ -210,11 +204,10 @@ monitor_reactors_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
char buff[MAX_RV_REACTOR_NAME_SIZE + 2];
- struct rv_monitor_def *mdef;
- struct rv_reactor_def *rdef;
+ struct rv_monitor *mon;
+ struct rv_reactor *reactor;
struct seq_file *seq_f;
int retval = -EINVAL;
- bool enable;
char *ptr;
int len;
@@ -237,22 +230,17 @@ monitor_reactors_write(struct file *file, const char __user *user_buf,
* See monitor_reactors_open()
*/
seq_f = file->private_data;
- mdef = seq_f->private;
+ mon = seq_f->private;
mutex_lock(&rv_interface_lock);
retval = -EINVAL;
- list_for_each_entry(rdef, &rv_reactors_list, list) {
- if (strcmp(ptr, rdef->reactor->name) != 0)
+ list_for_each_entry(reactor, &rv_reactors_list, list) {
+ if (strcmp(ptr, reactor->name) != 0)
continue;
- if (rdef == get_reactor_rdef_by_name("nop"))
- enable = false;
- else
- enable = true;
-
- monitor_swap_reactors(mdef, rdef, enable);
+ monitor_swap_reactors(mon, reactor);
retval = count;
break;
@@ -268,7 +256,7 @@ monitor_reactors_write(struct file *file, const char __user *user_buf,
*/
static int monitor_reactors_open(struct inode *inode, struct file *file)
{
- struct rv_monitor_def *mdef = inode->i_private;
+ struct rv_monitor *mon = inode->i_private;
struct seq_file *seq_f;
int ret;
@@ -284,7 +272,7 @@ static int monitor_reactors_open(struct inode *inode, struct file *file)
/*
* Copy the create file "private" data to the seq_file private data.
*/
- seq_f->private = mdef;
+ seq_f->private = mon;
return 0;
};
@@ -299,23 +287,16 @@ static const struct file_operations monitor_reactors_ops = {
static int __rv_register_reactor(struct rv_reactor *reactor)
{
- struct rv_reactor_def *r;
+ struct rv_reactor *r;
list_for_each_entry(r, &rv_reactors_list, list) {
- if (strcmp(reactor->name, r->reactor->name) == 0) {
+ if (strcmp(reactor->name, r->name) == 0) {
pr_info("Reactor %s is already registered\n", reactor->name);
return -EINVAL;
}
}
- r = kzalloc(sizeof(struct rv_reactor_def), GFP_KERNEL);
- if (!r)
- return -ENOMEM;
-
- r->reactor = reactor;
- r->counter = 0;
-
- list_add_tail(&r->list, &rv_reactors_list);
+ list_add_tail(&reactor->list, &rv_reactors_list);
return 0;
}
@@ -350,30 +331,10 @@ int rv_register_reactor(struct rv_reactor *reactor)
*/
int rv_unregister_reactor(struct rv_reactor *reactor)
{
- struct rv_reactor_def *ptr, *next;
- int ret = 0;
-
mutex_lock(&rv_interface_lock);
-
- list_for_each_entry_safe(ptr, next, &rv_reactors_list, list) {
- if (strcmp(reactor->name, ptr->reactor->name) == 0) {
-
- if (!ptr->counter) {
- list_del(&ptr->list);
- } else {
- printk(KERN_WARNING
- "rv: the rv_reactor %s is in use by %d monitor(s)\n",
- ptr->reactor->name, ptr->counter);
- printk(KERN_WARNING "rv: the rv_reactor %s cannot be removed\n",
- ptr->reactor->name);
- ret = -EBUSY;
- break;
- }
- }
- }
-
+ list_del(&reactor->list);
mutex_unlock(&rv_interface_lock);
- return ret;
+ return 0;
}
/*
@@ -454,43 +415,30 @@ static const struct file_operations reacting_on_fops = {
/**
* reactor_populate_monitor - creates per monitor reactors file
- * @mdef: monitor's definition.
+ * @mon: The monitor.
*
* Returns 0 if successful, error otherwise.
*/
-int reactor_populate_monitor(struct rv_monitor_def *mdef)
+int reactor_populate_monitor(struct rv_monitor *mon)
{
struct dentry *tmp;
- tmp = rv_create_file("reactors", RV_MODE_WRITE, mdef->root_d, mdef, &monitor_reactors_ops);
+ tmp = rv_create_file("reactors", RV_MODE_WRITE, mon->root_d, mon, &monitor_reactors_ops);
if (!tmp)
return -ENOMEM;
/*
* Configure as the rv_nop reactor.
*/
- mdef->rdef = get_reactor_rdef_by_name("nop");
- mdef->rdef->counter++;
- mdef->reacting = false;
+ mon->reactor = get_reactor_rdef_by_name("nop");
return 0;
}
-/**
- * reactor_cleanup_monitor - cleanup a monitor reference
- * @mdef: monitor's definition.
- */
-void reactor_cleanup_monitor(struct rv_monitor_def *mdef)
-{
- lockdep_assert_held(&rv_interface_lock);
- mdef->rdef->counter--;
- WARN_ON_ONCE(mdef->rdef->counter < 0);
-}
-
/*
* Nop reactor register
*/
-static void rv_nop_reaction(char *msg)
+__printf(1, 2) static void rv_nop_reaction(const char *msg, ...)
{
}
diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
index 422b75f58891..4a6faddac614 100644
--- a/kernel/trace/rv/rv_trace.h
+++ b/kernel/trace/rv/rv_trace.h
@@ -16,24 +16,24 @@ DECLARE_EVENT_CLASS(event_da_monitor,
TP_ARGS(state, event, next_state, final_state),
TP_STRUCT__entry(
- __array( char, state, MAX_DA_NAME_LEN )
- __array( char, event, MAX_DA_NAME_LEN )
- __array( char, next_state, MAX_DA_NAME_LEN )
- __field( bool, final_state )
+ __string( state, state )
+ __string( event, event )
+ __string( next_state, next_state )
+ __field( bool, final_state )
),
TP_fast_assign(
- memcpy(__entry->state, state, MAX_DA_NAME_LEN);
- memcpy(__entry->event, event, MAX_DA_NAME_LEN);
- memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN);
- __entry->final_state = final_state;
+ __assign_str(state);
+ __assign_str(event);
+ __assign_str(next_state);
+ __entry->final_state = final_state;
),
- TP_printk("%s x %s -> %s %s",
- __entry->state,
- __entry->event,
- __entry->next_state,
- __entry->final_state ? "(final)" : "")
+ TP_printk("%s x %s -> %s%s",
+ __get_str(state),
+ __get_str(event),
+ __get_str(next_state),
+ __entry->final_state ? " (final)" : "")
);
DECLARE_EVENT_CLASS(error_da_monitor,
@@ -43,26 +43,26 @@ DECLARE_EVENT_CLASS(error_da_monitor,
TP_ARGS(state, event),
TP_STRUCT__entry(
- __array( char, state, MAX_DA_NAME_LEN )
- __array( char, event, MAX_DA_NAME_LEN )
+ __string( state, state )
+ __string( event, event )
),
TP_fast_assign(
- memcpy(__entry->state, state, MAX_DA_NAME_LEN);
- memcpy(__entry->event, event, MAX_DA_NAME_LEN);
+ __assign_str(state);
+ __assign_str(event);
),
TP_printk("event %s not expected in the state %s",
- __entry->event,
- __entry->state)
+ __get_str(event),
+ __get_str(state))
);
#include <monitors/wip/wip_trace.h>
-#include <monitors/tss/tss_trace.h>
#include <monitors/sco/sco_trace.h>
#include <monitors/scpd/scpd_trace.h>
#include <monitors/snep/snep_trace.h>
-#include <monitors/sncid/sncid_trace.h>
+#include <monitors/sts/sts_trace.h>
+#include <monitors/opid/opid_trace.h>
// Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here
#endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */
@@ -75,27 +75,27 @@ DECLARE_EVENT_CLASS(event_da_monitor_id,
TP_ARGS(id, state, event, next_state, final_state),
TP_STRUCT__entry(
- __field( int, id )
- __array( char, state, MAX_DA_NAME_LEN )
- __array( char, event, MAX_DA_NAME_LEN )
- __array( char, next_state, MAX_DA_NAME_LEN )
- __field( bool, final_state )
+ __field( int, id )
+ __string( state, state )
+ __string( event, event )
+ __string( next_state, next_state )
+ __field( bool, final_state )
),
TP_fast_assign(
- memcpy(__entry->state, state, MAX_DA_NAME_LEN);
- memcpy(__entry->event, event, MAX_DA_NAME_LEN);
- memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN);
- __entry->id = id;
- __entry->final_state = final_state;
+ __assign_str(state);
+ __assign_str(event);
+ __assign_str(next_state);
+ __entry->id = id;
+ __entry->final_state = final_state;
),
- TP_printk("%d: %s x %s -> %s %s",
+ TP_printk("%d: %s x %s -> %s%s",
__entry->id,
- __entry->state,
- __entry->event,
- __entry->next_state,
- __entry->final_state ? "(final)" : "")
+ __get_str(state),
+ __get_str(event),
+ __get_str(next_state),
+ __entry->final_state ? " (final)" : "")
);
DECLARE_EVENT_CLASS(error_da_monitor_id,
@@ -105,32 +105,108 @@ DECLARE_EVENT_CLASS(error_da_monitor_id,
TP_ARGS(id, state, event),
TP_STRUCT__entry(
- __field( int, id )
- __array( char, state, MAX_DA_NAME_LEN )
- __array( char, event, MAX_DA_NAME_LEN )
+ __field( int, id )
+ __string( state, state )
+ __string( event, event )
),
TP_fast_assign(
- memcpy(__entry->state, state, MAX_DA_NAME_LEN);
- memcpy(__entry->event, event, MAX_DA_NAME_LEN);
- __entry->id = id;
+ __assign_str(state);
+ __assign_str(event);
+ __entry->id = id;
),
TP_printk("%d: event %s not expected in the state %s",
__entry->id,
- __entry->event,
- __entry->state)
+ __get_str(event),
+ __get_str(state))
);
#include <monitors/wwnr/wwnr_trace.h>
#include <monitors/snroc/snroc_trace.h>
+#include <monitors/nrp/nrp_trace.h>
+#include <monitors/sssw/sssw_trace.h>
// Add new monitors based on CONFIG_DA_MON_EVENTS_ID here
#endif /* CONFIG_DA_MON_EVENTS_ID */
+#ifdef CONFIG_LTL_MON_EVENTS_ID
+DECLARE_EVENT_CLASS(event_ltl_monitor_id,
+
+ TP_PROTO(struct task_struct *task, char *states, char *atoms, char *next),
+
+ TP_ARGS(task, states, atoms, next),
+
+ TP_STRUCT__entry(
+ __string(comm, task->comm)
+ __field(pid_t, pid)
+ __string(states, states)
+ __string(atoms, atoms)
+ __string(next, next)
+ ),
+
+ TP_fast_assign(
+ __assign_str(comm);
+ __entry->pid = task->pid;
+ __assign_str(states);
+ __assign_str(atoms);
+ __assign_str(next);
+ ),
+
+ TP_printk("%s[%d]: (%s) x (%s) -> (%s)", __get_str(comm), __entry->pid,
+ __get_str(states), __get_str(atoms), __get_str(next))
+);
+
+DECLARE_EVENT_CLASS(error_ltl_monitor_id,
+
+ TP_PROTO(struct task_struct *task),
+
+ TP_ARGS(task),
+
+ TP_STRUCT__entry(
+ __string(comm, task->comm)
+ __field(pid_t, pid)
+ ),
+
+ TP_fast_assign(
+ __assign_str(comm);
+ __entry->pid = task->pid;
+ ),
+
+ TP_printk("%s[%d]: violation detected", __get_str(comm), __entry->pid)
+);
+#include <monitors/pagefault/pagefault_trace.h>
+#include <monitors/sleep/sleep_trace.h>
+// Add new monitors based on CONFIG_LTL_MON_EVENTS_ID here
+#endif /* CONFIG_LTL_MON_EVENTS_ID */
+
+#ifdef CONFIG_RV_MON_MAINTENANCE_EVENTS
+/* Tracepoint useful for monitors development, currenly only used in DA */
+TRACE_EVENT(rv_retries_error,
+
+ TP_PROTO(char *name, char *event),
+
+ TP_ARGS(name, event),
+
+ TP_STRUCT__entry(
+ __string( name, name )
+ __string( event, event )
+ ),
+
+ TP_fast_assign(
+ __assign_str(name);
+ __assign_str(event);
+ ),
+
+ TP_printk(__stringify(MAX_DA_RETRY_RACING_EVENTS)
+ " retries reached for event %s, resetting monitor %s",
+ __get_str(event), __get_str(name))
+);
+#endif /* CONFIG_RV_MON_MAINTENANCE_EVENTS */
#endif /* _TRACE_RV_H */
-/* This part ust be outside protection */
+/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE rv_trace
#include <trace/define_trace.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5b8db27fb6ef..7996f26c3f46 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -51,6 +51,7 @@
#include <linux/workqueue.h>
#include <linux/sort.h>
#include <linux/io.h> /* vmap_page_range() */
+#include <linux/fs_context.h>
#include <asm/setup.h> /* COMMAND_LINE_SIZE */
@@ -120,6 +121,7 @@ static int tracing_disabled = 1;
cpumask_var_t __read_mostly tracing_buffer_mask;
+#define MAX_TRACER_SIZE 100
/*
* ftrace_dump_on_oops - variable to dump ftrace buffer on oops
*
@@ -142,7 +144,40 @@ cpumask_var_t __read_mostly tracing_buffer_mask;
char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
/* When set, tracing will stop when a WARN*() is hit */
-int __disable_trace_on_warning;
+static int __disable_trace_on_warning;
+
+int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
+static const struct ctl_table trace_sysctl_table[] = {
+ {
+ .procname = "ftrace_dump_on_oops",
+ .data = &ftrace_dump_on_oops,
+ .maxlen = MAX_TRACER_SIZE,
+ .mode = 0644,
+ .proc_handler = proc_dostring,
+ },
+ {
+ .procname = "traceoff_on_warning",
+ .data = &__disable_trace_on_warning,
+ .maxlen = sizeof(__disable_trace_on_warning),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tracepoint_printk",
+ .data = &tracepoint_printk,
+ .maxlen = sizeof(tracepoint_printk),
+ .mode = 0644,
+ .proc_handler = tracepoint_printk_sysctl,
+ },
+};
+
+static int __init init_trace_sysctls(void)
+{
+ register_sysctl_init("kernel", trace_sysctl_table);
+ return 0;
+}
+subsys_initcall(init_trace_sysctls);
#ifdef CONFIG_TRACE_EVAL_MAP_FILE
/* Map of enums to their values, for "eval_map" file */
@@ -493,7 +528,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export);
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
- TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
+ TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \
+ TRACE_ITER_COPY_MARKER)
/* trace_options that are only supported by global_trace */
#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
@@ -501,7 +537,8 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_export);
/* trace_flags that are default zero for instances */
#define ZEROED_TRACE_FLAGS \
- (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
+ (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
+ TRACE_ITER_COPY_MARKER)
/*
* The global_trace is the descriptor that holds the top-level tracing
@@ -513,6 +550,9 @@ static struct trace_array global_trace = {
static struct trace_array *printk_trace = &global_trace;
+/* List of trace_arrays interested in the top level trace_marker */
+static LIST_HEAD(marker_copies);
+
static __always_inline bool printk_binsafe(struct trace_array *tr)
{
/*
@@ -534,6 +574,28 @@ static void update_printk_trace(struct trace_array *tr)
tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
}
+/* Returns true if the status of tr changed */
+static bool update_marker_trace(struct trace_array *tr, int enabled)
+{
+ lockdep_assert_held(&event_mutex);
+
+ if (enabled) {
+ if (!list_empty(&tr->marker_list))
+ return false;
+
+ list_add_rcu(&tr->marker_list, &marker_copies);
+ tr->trace_flags |= TRACE_ITER_COPY_MARKER;
+ return true;
+ }
+
+ if (list_empty(&tr->marker_list))
+ return false;
+
+ list_del_init(&tr->marker_list);
+ tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
+ return true;
+}
+
void trace_set_ring_buffer_expanded(struct trace_array *tr)
{
if (!tr)
@@ -1583,6 +1645,39 @@ void tracer_tracing_off(struct trace_array *tr)
}
/**
+ * tracer_tracing_disable() - temporary disable the buffer from write
+ * @tr: The trace array to disable its buffer for
+ *
+ * Expects trace_tracing_enable() to re-enable tracing.
+ * The difference between this and tracer_tracing_off() is that this
+ * is a counter and can nest, whereas, tracer_tracing_off() can
+ * be called multiple times and a single trace_tracing_on() will
+ * enable it.
+ */
+void tracer_tracing_disable(struct trace_array *tr)
+{
+ if (WARN_ON_ONCE(!tr->array_buffer.buffer))
+ return;
+
+ ring_buffer_record_disable(tr->array_buffer.buffer);
+}
+
+/**
+ * tracer_tracing_enable() - counter part of tracer_tracing_disable()
+ * @tr: The trace array that had tracer_tracincg_disable() called on it
+ *
+ * This is called after tracer_tracing_disable() has been called on @tr,
+ * when it's safe to re-enable tracing.
+ */
+void tracer_tracing_enable(struct trace_array *tr)
+{
+ if (WARN_ON_ONCE(!tr->array_buffer.buffer))
+ return;
+
+ ring_buffer_record_enable(tr->array_buffer.buffer);
+}
+
+/**
* tracing_off - turn off tracing buffers
*
* This function stops the tracing buffers from recording data.
@@ -4640,21 +4735,15 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
for_each_tracing_cpu(cpu) {
iter->buffer_iter[cpu] =
- ring_buffer_read_prepare(iter->array_buffer->buffer,
- cpu, GFP_KERNEL);
- }
- ring_buffer_read_prepare_sync();
- for_each_tracing_cpu(cpu) {
- ring_buffer_read_start(iter->buffer_iter[cpu]);
+ ring_buffer_read_start(iter->array_buffer->buffer,
+ cpu, GFP_KERNEL);
tracing_iter_reset(iter, cpu);
}
} else {
cpu = iter->cpu_file;
iter->buffer_iter[cpu] =
- ring_buffer_read_prepare(iter->array_buffer->buffer,
- cpu, GFP_KERNEL);
- ring_buffer_read_prepare_sync();
- ring_buffer_read_start(iter->buffer_iter[cpu]);
+ ring_buffer_read_start(iter->array_buffer->buffer,
+ cpu, GFP_KERNEL);
tracing_iter_reset(iter, cpu);
}
@@ -5048,7 +5137,6 @@ int tracing_set_cpumask(struct trace_array *tr,
*/
if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
- atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
#ifdef CONFIG_TRACER_MAX_TRACE
ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
@@ -5056,7 +5144,6 @@ int tracing_set_cpumask(struct trace_array *tr,
}
if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
- atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
#ifdef CONFIG_TRACER_MAX_TRACE
ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
@@ -5189,7 +5276,8 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
if ((mask == TRACE_ITER_RECORD_TGID) ||
(mask == TRACE_ITER_RECORD_CMD) ||
- (mask == TRACE_ITER_TRACE_PRINTK))
+ (mask == TRACE_ITER_TRACE_PRINTK) ||
+ (mask == TRACE_ITER_COPY_MARKER))
lockdep_assert_held(&event_mutex);
/* do nothing if flag is already set */
@@ -5220,6 +5308,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
}
}
+ if (mask == TRACE_ITER_COPY_MARKER)
+ update_marker_trace(tr, enabled);
+
if (enabled)
tr->trace_flags |= mask;
else
@@ -6004,6 +6095,7 @@ struct trace_mod_entry {
};
struct trace_scratch {
+ unsigned int clock_id;
unsigned long text_addr;
unsigned long nr_entries;
struct trace_mod_entry entries[];
@@ -6032,6 +6124,7 @@ unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
struct trace_module_delta *module_delta;
struct trace_scratch *tscratch;
struct trace_mod_entry *entry;
+ unsigned long raddr;
int idx = 0, nr_entries;
/* If we don't have last boot delta, return the address */
@@ -6045,7 +6138,9 @@ unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
module_delta = READ_ONCE(tr->module_delta);
if (!module_delta || !tscratch->nr_entries ||
tscratch->entries[0].mod_addr > addr) {
- return addr + tr->text_delta;
+ raddr = addr + tr->text_delta;
+ return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
+ is_kernel_rodata(raddr) ? raddr : addr;
}
/* Note that entries must be sorted. */
@@ -6116,6 +6211,7 @@ static void update_last_data(struct trace_array *tr)
if (tr->scratch) {
struct trace_scratch *tscratch = tr->scratch;
+ tscratch->clock_id = tr->clock_id;
memset(tscratch->entries, 0,
flex_array_size(tscratch, entries, tscratch->nr_entries));
tscratch->nr_entries = 0;
@@ -6610,6 +6706,22 @@ static int tracing_wait_pipe(struct file *filp)
return 1;
}
+static bool update_last_data_if_empty(struct trace_array *tr)
+{
+ if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
+ return false;
+
+ if (!ring_buffer_empty(tr->array_buffer.buffer))
+ return false;
+
+ /*
+ * If the buffer contains the last boot data and all per-cpu
+ * buffers are empty, reset it from the kernel side.
+ */
+ update_last_data(tr);
+ return true;
+}
+
/*
* Consumer reader.
*/
@@ -6641,6 +6753,9 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
}
waitagain:
+ if (update_last_data_if_empty(iter->tr))
+ return 0;
+
sret = tracing_wait_pipe(filp);
if (sret <= 0)
return sret;
@@ -6824,7 +6939,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
ret = trace_seq_to_buffer(&iter->seq,
page_address(spd.pages[i]),
min((size_t)trace_seq_used(&iter->seq),
- PAGE_SIZE));
+ (size_t)PAGE_SIZE));
if (ret < 0) {
__free_page(spd.pages[i]);
break;
@@ -7100,11 +7215,9 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
#define TRACE_MARKER_MAX_SIZE 4096
-static ssize_t
-tracing_mark_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *fpos)
+static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
+ size_t cnt, unsigned long ip)
{
- struct trace_array *tr = filp->private_data;
struct ring_buffer_event *event;
enum event_trigger_type tt = ETT_NONE;
struct trace_buffer *buffer;
@@ -7118,18 +7231,6 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
#define FAULTED_STR "<faulted>"
#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
- if (tracing_disabled)
- return -EINVAL;
-
- if (!(tr->trace_flags & TRACE_ITER_MARKERS))
- return -EINVAL;
-
- if ((ssize_t)cnt < 0)
- return -EINVAL;
-
- if (cnt > TRACE_MARKER_MAX_SIZE)
- cnt = TRACE_MARKER_MAX_SIZE;
-
meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
again:
size = cnt + meta_size;
@@ -7162,7 +7263,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
}
entry = ring_buffer_event_data(event);
- entry->ip = _THIS_IP_;
+ entry->ip = ip;
len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
if (len) {
@@ -7195,18 +7296,12 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
}
static ssize_t
-tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
struct trace_array *tr = filp->private_data;
- struct ring_buffer_event *event;
- struct trace_buffer *buffer;
- struct raw_data_entry *entry;
- ssize_t written;
- int size;
- int len;
-
-#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+ ssize_t written = -ENODEV;
+ unsigned long ip;
if (tracing_disabled)
return -EINVAL;
@@ -7214,10 +7309,42 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
if (!(tr->trace_flags & TRACE_ITER_MARKERS))
return -EINVAL;
- /* The marker must at least have a tag id */
- if (cnt < sizeof(unsigned int))
+ if ((ssize_t)cnt < 0)
return -EINVAL;
+ if (cnt > TRACE_MARKER_MAX_SIZE)
+ cnt = TRACE_MARKER_MAX_SIZE;
+
+ /* The selftests expect this function to be the IP address */
+ ip = _THIS_IP_;
+
+ /* The global trace_marker can go to multiple instances */
+ if (tr == &global_trace) {
+ guard(rcu)();
+ list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
+ written = write_marker_to_buffer(tr, ubuf, cnt, ip);
+ if (written < 0)
+ break;
+ }
+ } else {
+ written = write_marker_to_buffer(tr, ubuf, cnt, ip);
+ }
+
+ return written;
+}
+
+static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
+ const char __user *ubuf, size_t cnt)
+{
+ struct ring_buffer_event *event;
+ struct trace_buffer *buffer;
+ struct raw_data_entry *entry;
+ ssize_t written;
+ int size;
+ int len;
+
+#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+
size = sizeof(*entry) + cnt;
if (cnt < FAULT_SIZE_ID)
size += FAULT_SIZE_ID - cnt;
@@ -7248,6 +7375,40 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
return written;
}
+static ssize_t
+tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *fpos)
+{
+ struct trace_array *tr = filp->private_data;
+ ssize_t written = -ENODEV;
+
+#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
+
+ if (tracing_disabled)
+ return -EINVAL;
+
+ if (!(tr->trace_flags & TRACE_ITER_MARKERS))
+ return -EINVAL;
+
+ /* The marker must at least have a tag id */
+ if (cnt < sizeof(unsigned int))
+ return -EINVAL;
+
+ /* The global trace_marker_raw can go to multiple instances */
+ if (tr == &global_trace) {
+ guard(rcu)();
+ list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
+ written = write_raw_marker_to_buffer(tr, ubuf, cnt);
+ if (written < 0)
+ break;
+ }
+ } else {
+ written = write_raw_marker_to_buffer(tr, ubuf, cnt);
+ }
+
+ return written;
+}
+
static int tracing_clock_show(struct seq_file *m, void *v)
{
struct trace_array *tr = m->private;
@@ -7292,6 +7453,12 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr)
tracing_reset_online_cpus(&tr->max_buffer);
#endif
+ if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
+ struct trace_scratch *tscratch = tr->scratch;
+
+ tscratch->clock_id = i;
+ }
+
mutex_unlock(&trace_types_lock);
return 0;
@@ -8167,6 +8334,9 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if (ret < 0) {
if (trace_empty(iter) && !iter->closed) {
+ if (update_last_data_if_empty(iter->tr))
+ return 0;
+
if ((filp->f_flags & O_NONBLOCK))
return -EAGAIN;
@@ -8508,10 +8678,6 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
return -ENODEV;
- /* Currently the boot mapped buffer is not supported for mmap */
- if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
- return -ENODEV;
-
ret = get_snapshot_map(iter->tr);
if (ret)
return ret;
@@ -9517,6 +9683,15 @@ static void setup_trace_scratch(struct trace_array *tr,
/* Scan modules to make text delta for modules. */
module_for_each_mod(make_mod_delta, tr);
+
+ /* Set trace_clock as the same of the previous boot. */
+ if (tscratch->clock_id != tr->clock_id) {
+ if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
+ tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
+ pr_info("the previous trace_clock info is not valid.");
+ goto reset;
+ }
+ }
return;
reset:
/* Invalid trace modules */
@@ -9741,6 +9916,7 @@ trace_array_create_systems(const char *name, const char *systems,
INIT_LIST_HEAD(&tr->events);
INIT_LIST_HEAD(&tr->hist_vars);
INIT_LIST_HEAD(&tr->err_log);
+ INIT_LIST_HEAD(&tr->marker_list);
#ifdef CONFIG_MODULES
INIT_LIST_HEAD(&tr->mod_events);
@@ -9900,6 +10076,9 @@ static int __remove_instance(struct trace_array *tr)
if (printk_trace == tr)
update_printk_trace(&global_trace);
+ if (update_marker_trace(tr, 0))
+ synchronize_rcu();
+
tracing_set_nop(tr);
clear_ftrace_function_probes(tr);
event_trace_del_tracer(tr);
@@ -10075,6 +10254,8 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
{
struct vfsmount *mnt;
struct file_system_type *type;
+ struct fs_context *fc;
+ int ret;
/*
* To maintain backward compatibility for tools that mount
@@ -10084,12 +10265,20 @@ static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
type = get_fs_type("tracefs");
if (!type)
return NULL;
- mnt = vfs_submount(mntpt, type, "tracefs", NULL);
+
+ fc = fs_context_for_submount(type, mntpt);
put_filesystem(type);
- if (IS_ERR(mnt))
- return NULL;
- mntget(mnt);
+ if (IS_ERR(fc))
+ return ERR_CAST(fc);
+
+ ret = vfs_parse_fs_string(fc, "source",
+ "tracefs", strlen("tracefs"));
+ if (!ret)
+ mnt = fc_mount(fc);
+ else
+ mnt = ERR_PTR(ret);
+ put_fs_context(fc);
return mnt;
}
@@ -10448,7 +10637,7 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m
static struct trace_iterator iter;
unsigned int old_userobj;
unsigned long flags;
- int cnt = 0, cpu;
+ int cnt = 0;
/*
* Always turn off tracing when we dump.
@@ -10465,9 +10654,8 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m
/* Simulate the iterator */
trace_init_iter(&iter, tr);
- for_each_tracing_cpu(cpu) {
- atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
- }
+ /* While dumping, do not allow the buffer to be enable */
+ tracer_tracing_disable(tr);
old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
@@ -10526,9 +10714,7 @@ static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_m
tr->trace_flags |= old_userobj;
- for_each_tracing_cpu(cpu) {
- atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
- }
+ tracer_tracing_enable(tr);
local_irq_restore(flags);
}
@@ -10968,6 +11154,7 @@ __init static int tracer_alloc_buffers(void)
INIT_LIST_HEAD(&global_trace.events);
INIT_LIST_HEAD(&global_trace.hist_vars);
INIT_LIST_HEAD(&global_trace.err_log);
+ list_add(&global_trace.marker_list, &marker_copies);
list_add(&global_trace.list, &ftrace_trace_arrays);
apply_trace_boot_options();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 79be1995db44..bd084953a98b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -183,8 +183,7 @@ struct trace_array;
* the trace, etc.)
*/
struct trace_array_cpu {
- atomic_t disabled;
- void *buffer_page; /* ring buffer spare */
+ local_t disabled;
unsigned long entries;
unsigned long saved_latency;
@@ -404,6 +403,7 @@ struct trace_array {
struct trace_options *topts;
struct list_head systems;
struct list_head events;
+ struct list_head marker_list;
struct trace_event_file *trace_marker_file;
cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
/* one per_cpu trace_pipe can be opened by only one user */
@@ -665,12 +665,29 @@ bool tracing_is_disabled(void);
bool tracer_tracing_is_on(struct trace_array *tr);
void tracer_tracing_on(struct trace_array *tr);
void tracer_tracing_off(struct trace_array *tr);
+void tracer_tracing_disable(struct trace_array *tr);
+void tracer_tracing_enable(struct trace_array *tr);
struct dentry *trace_create_file(const char *name,
umode_t mode,
struct dentry *parent,
void *data,
const struct file_operations *fops);
+
+/**
+ * tracer_tracing_is_on_cpu - show real state of ring buffer enabled on for a cpu
+ * @tr : the trace array to know if ring buffer is enabled
+ * @cpu: The cpu buffer to check if enabled
+ *
+ * Shows real state of the per CPU buffer if it is enabled or not.
+ */
+static inline bool tracer_tracing_is_on_cpu(struct trace_array *tr, int cpu)
+{
+ if (tr->array_buffer.buffer)
+ return ring_buffer_record_is_on_cpu(tr->array_buffer.buffer, cpu);
+ return false;
+}
+
int tracing_init_dentry(void);
struct ring_buffer_event;
@@ -1368,6 +1385,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(MARKERS, "markers"), \
C(EVENT_FORK, "event-fork"), \
C(TRACE_PRINTK, "trace_printk_dest"), \
+ C(COPY_MARKER, "copy_trace_marker"),\
C(PAUSE_ON_TRACE, "pause-on-trace"), \
C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \
FUNCTION_FLAGS \
@@ -1772,6 +1790,9 @@ extern int event_enable_register_trigger(char *glob,
extern void event_enable_unregister_trigger(char *glob,
struct event_trigger_data *test,
struct trace_event_file *file);
+extern struct event_trigger_data *
+trigger_data_alloc(struct event_command *cmd_ops, char *cmd, char *param,
+ void *private_data);
extern void trigger_data_free(struct event_trigger_data *data);
extern int event_trigger_init(struct event_trigger_data *data);
extern int trace_event_trigger_enable_disable(struct trace_event_file *file,
@@ -1798,11 +1819,6 @@ extern bool event_trigger_check_remove(const char *glob);
extern bool event_trigger_empty_param(const char *param);
extern int event_trigger_separate_filter(char *param_and_filter, char **param,
char **filter, bool param_required);
-extern struct event_trigger_data *
-event_trigger_alloc(struct event_command *cmd_ops,
- char *cmd,
- char *param,
- void *private_data);
extern int event_trigger_parse_num(char *trigger,
struct event_trigger_data *trigger_data);
extern int event_trigger_set_filter(struct event_command *cmd_ops,
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 6d08a5523ce0..6809b370e991 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -32,7 +32,6 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect)
{
struct trace_array *tr = branch_tracer;
struct trace_buffer *buffer;
- struct trace_array_cpu *data;
struct ring_buffer_event *event;
struct trace_branch *entry;
unsigned long flags;
@@ -54,8 +53,7 @@ probe_likely_condition(struct ftrace_likely_data *f, int val, int expect)
raw_local_irq_save(flags);
current->trace_recursion |= TRACE_BRANCH_BIT;
- data = this_cpu_ptr(tr->array_buffer.data);
- if (atomic_read(&data->disabled))
+ if (!tracer_tracing_is_on_cpu(tr, raw_smp_processor_id()))
goto out;
trace_ctx = tracing_gen_ctx_flags(flags);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 4ef4df6623a8..de294ae2c5c5 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -97,11 +97,11 @@ FTRACE_ENTRY_PACKED(fgraph_retaddr_entry, fgraph_retaddr_ent_entry,
F_STRUCT(
__field_struct( struct fgraph_retaddr_ent, graph_ent )
__field_packed( unsigned long, graph_ent, func )
- __field_packed( int, graph_ent, depth )
+ __field_packed( unsigned int, graph_ent, depth )
__field_packed( unsigned long, graph_ent, retaddr )
),
- F_printk("--> %ps (%d) <- %ps", (void *)__entry->func, __entry->depth,
+ F_printk("--> %ps (%u) <- %ps", (void *)__entry->func, __entry->depth,
(void *)__entry->retaddr)
);
@@ -124,13 +124,13 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__field_struct( struct ftrace_graph_ret, ret )
__field_packed( unsigned long, ret, func )
__field_packed( unsigned long, ret, retval )
- __field_packed( int, ret, depth )
+ __field_packed( unsigned int, ret, depth )
__field_packed( unsigned int, ret, overrun )
__field(unsigned long long, calltime )
__field(unsigned long long, rettime )
),
- F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d retval: %lx",
+ F_printk("<-- %ps (%u) (start: %llx end: %llx) over: %u retval: %lx",
(void *)__entry->func, __entry->depth,
__entry->calltime, __entry->rettime,
__entry->depth, __entry->retval)
@@ -146,13 +146,13 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ret, ret )
__field_packed( unsigned long, ret, func )
- __field_packed( int, ret, depth )
+ __field_packed( unsigned int, ret, depth )
__field_packed( unsigned int, ret, overrun )
__field(unsigned long long, calltime )
__field(unsigned long long, rettime )
),
- F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
+ F_printk("<-- %ps (%u) (start: %llx end: %llx) over: %u",
(void *)__entry->func, __entry->depth,
__entry->calltime, __entry->rettime,
__entry->depth)
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 916555f0de81..a1d402124836 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -9,14 +9,15 @@
* Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com>
*
*/
+#include <linux/cleanup.h>
+#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/ftrace.h>
#include "trace_dynevent.h"
#include "trace_probe.h"
-#include "trace_probe_tmpl.h"
#include "trace_probe_kernel.h"
+#include "trace_probe_tmpl.h"
#define EPROBE_EVENT_SYSTEM "eprobes"
@@ -343,10 +344,15 @@ get_event_field(struct fetch_insn *code, void *rec)
val = *(unsigned int *)addr;
break;
default:
- if (field->is_signed)
- val = *(long *)addr;
- else
- val = *(unsigned long *)addr;
+ if (field->size == sizeof(long)) {
+ if (field->is_signed)
+ val = *(long *)addr;
+ else
+ val = *(unsigned long *)addr;
+ break;
+ }
+ /* This is an array, point to the addr itself */
+ val = (unsigned long)addr;
break;
}
return val;
@@ -797,18 +803,20 @@ find_and_get_event(const char *system, const char *event_name)
static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
{
- struct traceprobe_parse_context ctx = {
- .event = ep->event,
- .flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT,
- };
+ struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL;
int ret;
- ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], &ctx);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->event = ep->event;
+ ctx->flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT;
+
+ ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], ctx);
/* Handle symbols "@" */
if (!ret)
ret = traceprobe_update_arg(&ep->tp.args[i]);
- traceprobe_finish_parse(&ctx);
return ret;
}
@@ -869,10 +877,10 @@ static int __trace_eprobe_create(int argc, const char *argv[])
const char *event = NULL, *group = EPROBE_EVENT_SYSTEM;
const char *sys_event = NULL, *sys_name = NULL;
struct trace_event_call *event_call;
+ char *buf1 __free(kfree) = NULL;
+ char *buf2 __free(kfree) = NULL;
+ char *gbuf __free(kfree) = NULL;
struct trace_eprobe *ep = NULL;
- char buf1[MAX_EVENT_NAME_LEN];
- char buf2[MAX_EVENT_NAME_LEN];
- char gbuf[MAX_EVENT_NAME_LEN];
int ret = 0, filter_idx = 0;
int i, filter_cnt;
@@ -883,6 +891,9 @@ static int __trace_eprobe_create(int argc, const char *argv[])
event = strchr(&argv[0][1], ':');
if (event) {
+ gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+ if (!gbuf)
+ goto mem_error;
event++;
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
@@ -892,6 +903,11 @@ static int __trace_eprobe_create(int argc, const char *argv[])
trace_probe_log_set_index(1);
sys_event = argv[1];
+
+ buf2 = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+ if (!buf2)
+ goto mem_error;
+
ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0);
if (ret || !sys_event || !sys_name) {
trace_probe_log_err(0, NO_EVENT_INFO);
@@ -899,7 +915,9 @@ static int __trace_eprobe_create(int argc, const char *argv[])
}
if (!event) {
- strscpy(buf1, sys_event, MAX_EVENT_NAME_LEN);
+ buf1 = kstrdup(sys_event, GFP_KERNEL);
+ if (!buf1)
+ goto mem_error;
event = buf1;
}
@@ -972,6 +990,9 @@ static int __trace_eprobe_create(int argc, const char *argv[])
trace_probe_log_clear();
return ret;
+mem_error:
+ ret = -ENOMEM;
+ goto error;
parse_error:
ret = -EINVAL;
error:
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 069e92856bda..d01e5c910ce1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -400,6 +400,20 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca
return true;
}
+static void handle_dereference_arg(const char *arg_str, u64 string_flags, int len,
+ u64 *dereference_flags, int arg,
+ struct trace_event_call *call)
+{
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(arg_str, len, call))
+ *dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(arg_str, len, call))
+ *dereference_flags &= ~(1ULL << arg);
+ else
+ pr_warn("TRACE EVENT ERROR: Bad dereference argument: '%.*s'\n",
+ len, arg_str);
+}
+
/*
* Examine the print fmt of the event looking for unsafe dereference
* pointers using %p* that could be recorded in the trace event and
@@ -563,11 +577,9 @@ static void test_event_printk(struct trace_event_call *call)
}
if (dereference_flags & (1ULL << arg)) {
- if (string_flags & (1ULL << arg)) {
- if (process_string(fmt + start_arg, e - start_arg, call))
- dereference_flags &= ~(1ULL << arg);
- } else if (process_pointer(fmt + start_arg, e - start_arg, call))
- dereference_flags &= ~(1ULL << arg);
+ handle_dereference_arg(fmt + start_arg, string_flags,
+ e - start_arg,
+ &dereference_flags, arg, call);
}
start_arg = i;
@@ -578,11 +590,9 @@ static void test_event_printk(struct trace_event_call *call)
}
if (dereference_flags & (1ULL << arg)) {
- if (string_flags & (1ULL << arg)) {
- if (process_string(fmt + start_arg, i - start_arg, call))
- dereference_flags &= ~(1ULL << arg);
- } else if (process_pointer(fmt + start_arg, i - start_arg, call))
- dereference_flags &= ~(1ULL << arg);
+ handle_dereference_arg(fmt + start_arg, string_flags,
+ i - start_arg,
+ &dereference_flags, arg, call);
}
/*
@@ -622,7 +632,6 @@ EXPORT_SYMBOL_GPL(trace_event_raw_init);
bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
{
struct trace_array *tr = trace_file->tr;
- struct trace_array_cpu *data;
struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
@@ -632,9 +641,11 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
if (!pid_list && !no_pid_list)
return false;
- data = this_cpu_ptr(tr->array_buffer.data);
-
- return data->ignore_pid;
+ /*
+ * This is recorded at every sched_switch for this task.
+ * Thus, even if the task migrates the ignore value will be the same.
+ */
+ return this_cpu_read(tr->array_buffer.data->ignore_pid) != 0;
}
EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
@@ -3125,7 +3136,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
if (ret < 0)
return ret;
+ down_write(&trace_event_sem);
list_add(&call->list, &ftrace_events);
+ up_write(&trace_event_sem);
+
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
atomic_set(&call->refcnt, 0);
else
@@ -3739,6 +3753,8 @@ __trace_add_event_dirs(struct trace_array *tr)
struct trace_event_call *call;
int ret;
+ lockdep_assert_held(&trace_event_sem);
+
list_for_each_entry(call, &ftrace_events, list) {
ret = __trace_add_new_event(call, tr);
if (ret < 0)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 2048560264bb..e4581e10782b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1250,7 +1250,9 @@ static void append_filter_err(struct trace_array *tr,
static inline struct event_filter *event_filter(struct trace_event_file *file)
{
- return file->filter;
+ return rcu_dereference_protected(file->filter,
+ lockdep_is_held(&event_mutex));
+
}
/* caller must hold event_mutex */
@@ -1320,7 +1322,7 @@ void free_event_filter(struct event_filter *filter)
static inline void __remove_filter(struct trace_event_file *file)
{
filter_disable(file);
- remove_filter_string(file->filter);
+ remove_filter_string(event_filter(file));
}
static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
@@ -1335,22 +1337,137 @@ static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
}
}
+struct filter_list {
+ struct list_head list;
+ struct event_filter *filter;
+};
+
+struct filter_head {
+ struct list_head list;
+ struct rcu_head rcu;
+};
+
+
+static void free_filter_list(struct rcu_head *rhp)
+{
+ struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu);
+ struct filter_list *filter_item, *tmp;
+
+ list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) {
+ __free_filter(filter_item->filter);
+ list_del(&filter_item->list);
+ kfree(filter_item);
+ }
+ kfree(filter_list);
+}
+
+static void free_filter_list_tasks(struct rcu_head *rhp)
+{
+ call_rcu(rhp, free_filter_list);
+}
+
+/*
+ * The tracepoint_synchronize_unregister() is a double rcu call.
+ * It calls synchronize_rcu_tasks_trace() followed by synchronize_rcu().
+ * Instead of waiting for it, simply call these via the call_rcu*()
+ * variants.
+ */
+static void delay_free_filter(struct filter_head *head)
+{
+ call_rcu_tasks_trace(&head->rcu, free_filter_list_tasks);
+}
+
+static void try_delay_free_filter(struct event_filter *filter)
+{
+ struct filter_head *head;
+ struct filter_list *item;
+
+ head = kmalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ goto free_now;
+
+ INIT_LIST_HEAD(&head->list);
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item) {
+ kfree(head);
+ goto free_now;
+ }
+
+ item->filter = filter;
+ list_add_tail(&item->list, &head->list);
+ delay_free_filter(head);
+ return;
+
+ free_now:
+ /* Make sure the filter is not being used */
+ tracepoint_synchronize_unregister();
+ __free_filter(filter);
+}
+
static inline void __free_subsystem_filter(struct trace_event_file *file)
{
- __free_filter(file->filter);
+ __free_filter(event_filter(file));
file->filter = NULL;
}
+static inline void event_set_filter(struct trace_event_file *file,
+ struct event_filter *filter)
+{
+ rcu_assign_pointer(file->filter, filter);
+}
+
+static inline void event_clear_filter(struct trace_event_file *file)
+{
+ RCU_INIT_POINTER(file->filter, NULL);
+}
+
static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
- struct trace_array *tr)
+ struct trace_array *tr,
+ struct event_filter *filter)
{
struct trace_event_file *file;
+ struct filter_head *head;
+ struct filter_list *item;
+
+ head = kmalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ goto free_now;
+
+ INIT_LIST_HEAD(&head->list);
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
continue;
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ goto free_now;
+ item->filter = event_filter(file);
+ list_add_tail(&item->list, &head->list);
+ event_clear_filter(file);
+ }
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ goto free_now;
+
+ item->filter = filter;
+ list_add_tail(&item->list, &head->list);
+
+ delay_free_filter(head);
+ return;
+ free_now:
+ tracepoint_synchronize_unregister();
+
+ if (head)
+ free_filter_list(&head->rcu);
+
+ list_for_each_entry(file, &tr->events, list) {
+ if (file->system != dir || !file->filter)
+ continue;
__free_subsystem_filter(file);
}
+ __free_filter(filter);
}
int filter_assign_type(const char *type)
@@ -2120,22 +2237,6 @@ static inline void event_set_filtered_flag(struct trace_event_file *file)
trace_buffered_event_enable();
}
-static inline void event_set_filter(struct trace_event_file *file,
- struct event_filter *filter)
-{
- rcu_assign_pointer(file->filter, filter);
-}
-
-static inline void event_clear_filter(struct trace_event_file *file)
-{
- RCU_INIT_POINTER(file->filter, NULL);
-}
-
-struct filter_list {
- struct list_head list;
- struct event_filter *filter;
-};
-
static int process_system_preds(struct trace_subsystem_dir *dir,
struct trace_array *tr,
struct filter_parse_error *pe,
@@ -2144,11 +2245,16 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
struct trace_event_file *file;
struct filter_list *filter_item;
struct event_filter *filter = NULL;
- struct filter_list *tmp;
- LIST_HEAD(filter_list);
+ struct filter_head *filter_list;
bool fail = true;
int err;
+ filter_list = kmalloc(sizeof(*filter_list), GFP_KERNEL);
+ if (!filter_list)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&filter_list->list);
+
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
@@ -2175,7 +2281,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
if (!filter_item)
goto fail_mem;
- list_add_tail(&filter_item->list, &filter_list);
+ list_add_tail(&filter_item->list, &filter_list->list);
/*
* Regardless of if this returned an error, we still
* replace the filter for the call.
@@ -2195,31 +2301,22 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
* Do a synchronize_rcu() and to ensure all calls are
* done with them before we free them.
*/
- tracepoint_synchronize_unregister();
- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
- __free_filter(filter_item->filter);
- list_del(&filter_item->list);
- kfree(filter_item);
- }
+ delay_free_filter(filter_list);
return 0;
fail:
/* No call succeeded */
- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
- list_del(&filter_item->list);
- kfree(filter_item);
- }
+ free_filter_list(&filter_list->rcu);
parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
return -EINVAL;
fail_mem:
__free_filter(filter);
+
/* If any call succeeded, we still need to sync */
if (!fail)
- tracepoint_synchronize_unregister();
- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
- __free_filter(filter_item->filter);
- list_del(&filter_item->list);
- kfree(filter_item);
- }
+ delay_free_filter(filter_list);
+ else
+ free_filter_list(&filter_list->rcu);
+
return -ENOMEM;
}
@@ -2361,9 +2458,7 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
event_clear_filter(file);
- /* Make sure the filter is not being used */
- tracepoint_synchronize_unregister();
- __free_filter(filter);
+ try_delay_free_filter(filter);
return 0;
}
@@ -2387,11 +2482,8 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
event_set_filter(file, filter);
- if (tmp) {
- /* Make sure the call is done with the filter */
- tracepoint_synchronize_unregister();
- __free_filter(tmp);
- }
+ if (tmp)
+ try_delay_free_filter(tmp);
}
return err;
@@ -2417,9 +2509,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
filter = system->filter;
system->filter = NULL;
/* Ensure all filters are no longer used */
- tracepoint_synchronize_unregister();
- filter_free_subsystem_filters(dir, tr);
- __free_filter(filter);
+ filter_free_subsystem_filters(dir, tr, filter);
return 0;
}
@@ -2810,6 +2900,10 @@ static __init int ftrace_test_event_filter(void)
if (i == DATA_CNT)
printk(KERN_CONT "OK\n");
+ /* Need to call ftrace_test_filter to prevent a warning */
+ if (!trace_ftrace_test_filter_enabled())
+ trace_ftrace_test_filter(1, 2, 3, 4, 5, 6, 7, 8);
+
return 0;
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1260c23cfa5f..1d536219b624 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -114,6 +114,7 @@ enum hist_field_fn {
HIST_FIELD_FN_BUCKET,
HIST_FIELD_FN_TIMESTAMP,
HIST_FIELD_FN_CPU,
+ HIST_FIELD_FN_COMM,
HIST_FIELD_FN_STRING,
HIST_FIELD_FN_DYNSTRING,
HIST_FIELD_FN_RELDYNSTRING,
@@ -506,6 +507,7 @@ enum hist_field_flags {
HIST_FIELD_FL_CONST = 1 << 18,
HIST_FIELD_FL_PERCENT = 1 << 19,
HIST_FIELD_FL_GRAPH = 1 << 20,
+ HIST_FIELD_FL_COMM = 1 << 21,
};
struct var_defs {
@@ -885,6 +887,15 @@ static u64 hist_field_cpu(struct hist_field *hist_field,
return cpu;
}
+static u64 hist_field_comm(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ return (u64)(unsigned long)current->comm;
+}
+
/**
* check_field_for_var_ref - Check if a VAR_REF field references a variable
* @hist_field: The VAR_REF field to check
@@ -1338,6 +1349,8 @@ static const char *hist_field_name(struct hist_field *field,
field_name = hist_field_name(field->operands[0], ++level);
else if (field->flags & HIST_FIELD_FL_CPU)
field_name = "common_cpu";
+ else if (field->flags & HIST_FIELD_FL_COMM)
+ field_name = "common_comm";
else if (field->flags & HIST_FIELD_FL_EXPR ||
field->flags & HIST_FIELD_FL_VAR_REF) {
if (field->system) {
@@ -2015,6 +2028,13 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
goto out;
}
+ if (flags & HIST_FIELD_FL_COMM) {
+ hist_field->fn_num = HIST_FIELD_FN_COMM;
+ hist_field->size = MAX_FILTER_STR_VAL;
+ hist_field->type = "char[]";
+ goto out;
+ }
+
if (WARN_ON_ONCE(!field))
goto out;
@@ -2359,9 +2379,11 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
hist_data->attrs->ts_in_usecs = true;
} else if (strcmp(field_name, "common_stacktrace") == 0) {
*flags |= HIST_FIELD_FL_STACKTRACE;
- } else if (strcmp(field_name, "common_cpu") == 0)
+ } else if (strcmp(field_name, "common_cpu") == 0) {
*flags |= HIST_FIELD_FL_CPU;
- else if (strcmp(field_name, "hitcount") == 0)
+ } else if (strcmp(field_name, "common_comm") == 0) {
+ *flags |= HIST_FIELD_FL_COMM | HIST_FIELD_FL_STRING;
+ } else if (strcmp(field_name, "hitcount") == 0)
*flags |= HIST_FIELD_FL_HITCOUNT;
else {
field = trace_find_event_field(file->event_call, field_name);
@@ -2377,6 +2399,8 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
*flags |= HIST_FIELD_FL_CPU;
} else if (field && field->filter_type == FILTER_STACKTRACE) {
*flags |= HIST_FIELD_FL_STACKTRACE;
+ } else if (field && field->filter_type == FILTER_COMM) {
+ *flags |= HIST_FIELD_FL_COMM | HIST_FIELD_FL_STRING;
} else {
hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
errpos(field_name));
@@ -4327,6 +4351,8 @@ static u64 hist_fn_call(struct hist_field *hist_field,
return hist_field_timestamp(hist_field, elt, buffer, rbe, event);
case HIST_FIELD_FN_CPU:
return hist_field_cpu(hist_field, elt, buffer, rbe, event);
+ case HIST_FIELD_FN_COMM:
+ return hist_field_comm(hist_field, elt, buffer, rbe, event);
case HIST_FIELD_FN_STRING:
return hist_field_string(hist_field, elt, buffer, rbe, event);
case HIST_FIELD_FN_DYNSTRING:
@@ -5212,22 +5238,25 @@ static inline void add_to_key(char *compound_key, void *key,
size_t size = key_field->size;
if (key_field->flags & HIST_FIELD_FL_STRING) {
- struct ftrace_event_field *field;
- field = key_field->field;
- if (field->filter_type == FILTER_DYN_STRING ||
- field->filter_type == FILTER_RDYN_STRING)
- size = *(u32 *)(rec + field->offset) >> 16;
- else if (field->filter_type == FILTER_STATIC_STRING)
- size = field->size;
+ if (key_field->flags & HIST_FIELD_FL_COMM) {
+ size = strlen((char *)key);
+ } else {
+ struct ftrace_event_field *field;
+
+ field = key_field->field;
+ if (field->filter_type == FILTER_DYN_STRING ||
+ field->filter_type == FILTER_RDYN_STRING)
+ size = *(u32 *)(rec + field->offset) >> 16;
+ else if (field->filter_type == FILTER_STATIC_STRING)
+ size = field->size;
+ }
/* ensure NULL-termination */
if (size > key_field->size - 1)
size = key_field->size - 1;
-
- strncpy(compound_key + key_field->offset, (char *)key, size);
- } else
- memcpy(compound_key + key_field->offset, key, size);
+ }
+ memcpy(compound_key + key_field->offset, key, size);
}
static void
@@ -5246,17 +5275,94 @@ hist_trigger_actions(struct hist_trigger_data *hist_data,
}
}
+/*
+ * The hist_pad structure is used to save information to create
+ * a histogram from the histogram trigger. It's too big to store
+ * on the stack, so when the histogram trigger is initialized
+ * a percpu array of 4 hist_pad structures is allocated.
+ * This will cover every context from normal, softirq, irq and NMI
+ * in the very unlikely event that a tigger happens at each of
+ * these contexts and interrupts a currently active trigger.
+ */
+struct hist_pad {
+ unsigned long entries[HIST_STACKTRACE_DEPTH];
+ u64 var_ref_vals[TRACING_MAP_VARS_MAX];
+ char compound_key[HIST_KEY_SIZE_MAX];
+};
+
+static struct hist_pad __percpu *hist_pads;
+static DEFINE_PER_CPU(int, hist_pad_cnt);
+static refcount_t hist_pad_ref;
+
+/* One hist_pad for every context (normal, softirq, irq, NMI) */
+#define MAX_HIST_CNT 4
+
+static int alloc_hist_pad(void)
+{
+ lockdep_assert_held(&event_mutex);
+
+ if (refcount_read(&hist_pad_ref)) {
+ refcount_inc(&hist_pad_ref);
+ return 0;
+ }
+
+ hist_pads = __alloc_percpu(sizeof(struct hist_pad) * MAX_HIST_CNT,
+ __alignof__(struct hist_pad));
+ if (!hist_pads)
+ return -ENOMEM;
+
+ refcount_set(&hist_pad_ref, 1);
+ return 0;
+}
+
+static void free_hist_pad(void)
+{
+ lockdep_assert_held(&event_mutex);
+
+ if (!refcount_dec_and_test(&hist_pad_ref))
+ return;
+
+ free_percpu(hist_pads);
+ hist_pads = NULL;
+}
+
+static struct hist_pad *get_hist_pad(void)
+{
+ struct hist_pad *hist_pad;
+ int cnt;
+
+ if (WARN_ON_ONCE(!hist_pads))
+ return NULL;
+
+ preempt_disable();
+
+ hist_pad = per_cpu_ptr(hist_pads, smp_processor_id());
+
+ if (this_cpu_read(hist_pad_cnt) == MAX_HIST_CNT) {
+ preempt_enable();
+ return NULL;
+ }
+
+ cnt = this_cpu_inc_return(hist_pad_cnt) - 1;
+
+ return &hist_pad[cnt];
+}
+
+static void put_hist_pad(void)
+{
+ this_cpu_dec(hist_pad_cnt);
+ preempt_enable();
+}
+
static void event_hist_trigger(struct event_trigger_data *data,
struct trace_buffer *buffer, void *rec,
struct ring_buffer_event *rbe)
{
struct hist_trigger_data *hist_data = data->private_data;
bool use_compound_key = (hist_data->n_keys > 1);
- unsigned long entries[HIST_STACKTRACE_DEPTH];
- u64 var_ref_vals[TRACING_MAP_VARS_MAX];
- char compound_key[HIST_KEY_SIZE_MAX];
struct tracing_map_elt *elt = NULL;
struct hist_field *key_field;
+ struct hist_pad *hist_pad;
u64 field_contents;
void *key = NULL;
unsigned int i;
@@ -5264,12 +5370,18 @@ static void event_hist_trigger(struct event_trigger_data *data,
if (unlikely(!rbe))
return;
- memset(compound_key, 0, hist_data->key_size);
+ hist_pad = get_hist_pad();
+ if (!hist_pad)
+ return;
+
+ memset(hist_pad->compound_key, 0, hist_data->key_size);
for_each_hist_key_field(i, hist_data) {
key_field = hist_data->fields[i];
if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
+ unsigned long *entries = hist_pad->entries;
+
memset(entries, 0, HIST_STACKTRACE_SIZE);
if (key_field->field) {
unsigned long *stack, n_entries;
@@ -5293,26 +5405,31 @@ static void event_hist_trigger(struct event_trigger_data *data,
}
if (use_compound_key)
- add_to_key(compound_key, key, key_field, rec);
+ add_to_key(hist_pad->compound_key, key, key_field, rec);
}
if (use_compound_key)
- key = compound_key;
+ key = hist_pad->compound_key;
if (hist_data->n_var_refs &&
- !resolve_var_refs(hist_data, key, var_ref_vals, false))
- return;
+ !resolve_var_refs(hist_data, key, hist_pad->var_ref_vals, false))
+ goto out;
elt = tracing_map_insert(hist_data->map, key);
if (!elt)
- return;
+ goto out;
- hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, var_ref_vals);
+ hist_trigger_elt_update(hist_data, elt, buffer, rec, rbe, hist_pad->var_ref_vals);
- if (resolve_var_refs(hist_data, key, var_ref_vals, true))
- hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals);
+ if (resolve_var_refs(hist_data, key, hist_pad->var_ref_vals, true)) {
+ hist_trigger_actions(hist_data, elt, buffer, rec, rbe,
+ key, hist_pad->var_ref_vals);
+ }
hist_poll_wakeup();
+
+ out:
+ put_hist_pad();
}
static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -6011,6 +6128,8 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
if (hist_field->flags & HIST_FIELD_FL_CPU)
seq_puts(m, "common_cpu");
+ if (hist_field->flags & HIST_FIELD_FL_COMM)
+ seq_puts(m, "common_comm");
else if (hist_field->flags & HIST_FIELD_FL_CONST)
seq_printf(m, "%llu", hist_field->constant);
else if (field_name) {
@@ -6157,6 +6276,9 @@ static int event_hist_trigger_init(struct event_trigger_data *data)
{
struct hist_trigger_data *hist_data = data->private_data;
+ if (alloc_hist_pad() < 0)
+ return -ENOMEM;
+
if (!data->ref && hist_data->attrs->name)
save_named_trigger(hist_data->attrs->name, data);
@@ -6201,6 +6323,7 @@ static void event_hist_trigger_free(struct event_trigger_data *data)
destroy_hist_data(hist_data);
}
+ free_hist_pad();
}
static const struct event_trigger_ops event_hist_trigger_ops = {
@@ -6216,9 +6339,7 @@ static int event_hist_trigger_named_init(struct event_trigger_data *data)
save_named_trigger(data->named_data->name, data);
- event_hist_trigger_init(data->named_data);
-
- return 0;
+ return event_hist_trigger_init(data->named_data);
}
static void event_hist_trigger_named_free(struct event_trigger_data *data)
@@ -6705,7 +6826,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
return PTR_ERR(hist_data);
}
- trigger_data = event_trigger_alloc(cmd_ops, cmd, param, hist_data);
+ trigger_data = trigger_data_alloc(cmd_ops, cmd, param, hist_data);
if (!trigger_data) {
ret = -ENOMEM;
goto out_free;
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 6e87ae2a1a66..cbfc306c0159 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -552,16 +552,14 @@ static int register_trigger(char *glob,
lockdep_assert_held(&event_mutex);
list_for_each_entry(test, &file->triggers, list) {
- if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type) {
- ret = -EEXIST;
- goto out;
- }
+ if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type)
+ return -EEXIST;
}
if (data->ops->init) {
ret = data->ops->init(data);
if (ret < 0)
- goto out;
+ return ret;
}
list_add_rcu(&data->list, &file->triggers);
@@ -572,7 +570,6 @@ static int register_trigger(char *glob,
list_del_rcu(&data->list);
update_cond_flag(file);
}
-out:
return ret;
}
@@ -770,7 +767,7 @@ int event_trigger_separate_filter(char *param_and_filter, char **param,
if (!param_and_filter) {
if (param_required)
ret = -EINVAL;
- goto out;
+ return ret;
}
/*
@@ -781,7 +778,7 @@ int event_trigger_separate_filter(char *param_and_filter, char **param,
*/
if (!param_required && param_and_filter && !isdigit(param_and_filter[0])) {
*filter = param_and_filter;
- goto out;
+ return ret;
}
/*
@@ -799,12 +796,11 @@ int event_trigger_separate_filter(char *param_and_filter, char **param,
if (!**filter)
*filter = NULL;
}
-out:
return ret;
}
/**
- * event_trigger_alloc - allocate and init event_trigger_data for a trigger
+ * trigger_data_alloc - allocate and init event_trigger_data for a trigger
* @cmd_ops: The event_command operations for the trigger
* @cmd: The cmd string
* @param: The param string
@@ -815,14 +811,14 @@ out:
* trigger_ops to assign to the event_trigger_data. @private_data can
* also be passed in and associated with the event_trigger_data.
*
- * Use event_trigger_free() to free an event_trigger_data object.
+ * Use trigger_data_free() to free an event_trigger_data object.
*
* Return: The trigger_data object success, NULL otherwise
*/
-struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops,
- char *cmd,
- char *param,
- void *private_data)
+struct event_trigger_data *trigger_data_alloc(struct event_command *cmd_ops,
+ char *cmd,
+ char *param,
+ void *private_data)
{
struct event_trigger_data *trigger_data;
const struct event_trigger_ops *trigger_ops;
@@ -989,15 +985,14 @@ event_trigger_parse(struct event_command *cmd_ops,
return ret;
ret = -ENOMEM;
- trigger_data = event_trigger_alloc(cmd_ops, cmd, param, file);
+ trigger_data = trigger_data_alloc(cmd_ops, cmd, param, file);
if (!trigger_data)
- goto out;
+ return ret;
if (remove) {
event_trigger_unregister(cmd_ops, file, glob+1, trigger_data);
- kfree(trigger_data);
- ret = 0;
- goto out;
+ trigger_data_free(trigger_data);
+ return 0;
}
ret = event_trigger_parse_num(param, trigger_data);
@@ -1017,13 +1012,12 @@ event_trigger_parse(struct event_command *cmd_ops,
/* Down the counter of trigger_data or free it if not used anymore */
event_trigger_free(trigger_data);
- out:
return ret;
out_free:
event_trigger_reset_filter(cmd_ops, trigger_data);
- kfree(trigger_data);
- goto out;
+ trigger_data_free(trigger_data);
+ return ret;
}
/**
@@ -1057,10 +1051,10 @@ int set_trigger_filter(char *filter_str,
s = strsep(&filter_str, " \t");
if (!strlen(s) || strcmp(s, "if") != 0)
- goto out;
+ return ret;
if (!filter_str)
- goto out;
+ return ret;
/* The filter is for the 'trigger' event, not the triggered event */
ret = create_event_filter(file->tr, file->event_call,
@@ -1104,7 +1098,6 @@ int set_trigger_filter(char *filter_str,
ret = -ENOMEM;
}
}
- out:
return ret;
}
@@ -1772,7 +1765,7 @@ int event_enable_trigger_parse(struct event_command *cmd_ops,
ret = -EINVAL;
event_enable_file = find_event_file(tr, system, event);
if (!event_enable_file)
- goto out;
+ return ret;
#ifdef CONFIG_HIST_TRIGGERS
hist = ((strcmp(cmd, ENABLE_HIST_STR) == 0) ||
@@ -1787,16 +1780,16 @@ int event_enable_trigger_parse(struct event_command *cmd_ops,
enable_data = kzalloc(sizeof(*enable_data), GFP_KERNEL);
if (!enable_data)
- goto out;
+ return ret;
enable_data->hist = hist;
enable_data->enable = enable;
enable_data->file = event_enable_file;
- trigger_data = event_trigger_alloc(cmd_ops, cmd, param, enable_data);
+ trigger_data = trigger_data_alloc(cmd_ops, cmd, param, enable_data);
if (!trigger_data) {
kfree(enable_data);
- goto out;
+ return ret;
}
if (remove) {
@@ -1804,7 +1797,7 @@ int event_enable_trigger_parse(struct event_command *cmd_ops,
kfree(trigger_data);
kfree(enable_data);
ret = 0;
- goto out;
+ return ret;
}
/* Up the trigger_data count to make sure nothing frees it on failure */
@@ -1834,7 +1827,6 @@ int event_enable_trigger_parse(struct event_command *cmd_ops,
goto out_disable;
event_trigger_free(trigger_data);
- out:
return ret;
out_disable:
trace_event_enable_disable(event_enable_file, 0, 1);
@@ -1845,7 +1837,7 @@ int event_enable_trigger_parse(struct event_command *cmd_ops,
event_trigger_free(trigger_data);
kfree(enable_data);
- goto out;
+ return ret;
}
int event_enable_register_trigger(char *glob,
@@ -1865,15 +1857,14 @@ int event_enable_register_trigger(char *glob,
(test->cmd_ops->trigger_type ==
data->cmd_ops->trigger_type) &&
(test_enable_data->file == enable_data->file)) {
- ret = -EEXIST;
- goto out;
+ return -EEXIST;
}
}
if (data->ops->init) {
ret = data->ops->init(data);
if (ret < 0)
- goto out;
+ return ret;
}
list_add_rcu(&data->list, &file->triggers);
@@ -1884,7 +1875,6 @@ int event_enable_register_trigger(char *glob,
list_del_rcu(&data->list);
update_cond_flag(file);
}
-out:
return ret;
}
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index b40fa59159ac..b36ade43d4b3 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -4,15 +4,18 @@
* Copyright (C) 2022 Google LLC.
*/
#define pr_fmt(fmt) "trace_fprobe: " fmt
-#include <asm/ptrace.h>
#include <linux/fprobe.h>
+#include <linux/list.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/security.h>
#include <linux/tracepoint.h>
#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+
#include "trace_dynevent.h"
#include "trace_probe.h"
#include "trace_probe_kernel.h"
@@ -21,7 +24,6 @@
#define FPROBE_EVENT_SYSTEM "fprobes"
#define TRACEPOINT_EVENT_SYSTEM "tracepoints"
#define RETHOOK_MAXACTIVE_MAX 4096
-#define TRACEPOINT_STUB ERR_PTR(-ENOENT)
static int trace_fprobe_create(const char *raw_command);
static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev);
@@ -38,15 +40,156 @@ static struct dyn_event_operations trace_fprobe_ops = {
.match = trace_fprobe_match,
};
+/* List of tracepoint_user */
+static LIST_HEAD(tracepoint_user_list);
+static DEFINE_MUTEX(tracepoint_user_mutex);
+
+/* While living tracepoint_user, @tpoint can be NULL and @refcount != 0. */
+struct tracepoint_user {
+ struct list_head list;
+ const char *name;
+ struct tracepoint *tpoint;
+ unsigned int refcount;
+};
+
+/* NOTE: you must lock tracepoint_user_mutex. */
+#define for_each_tracepoint_user(tuser) \
+ list_for_each_entry(tuser, &tracepoint_user_list, list)
+
+static int tracepoint_user_register(struct tracepoint_user *tuser)
+{
+ struct tracepoint *tpoint = tuser->tpoint;
+
+ if (!tpoint)
+ return 0;
+
+ return tracepoint_probe_register_prio_may_exist(tpoint,
+ tpoint->probestub, NULL, 0);
+}
+
+static void tracepoint_user_unregister(struct tracepoint_user *tuser)
+{
+ if (!tuser->tpoint)
+ return;
+
+ WARN_ON_ONCE(tracepoint_probe_unregister(tuser->tpoint, tuser->tpoint->probestub, NULL));
+ tuser->tpoint = NULL;
+}
+
+static unsigned long tracepoint_user_ip(struct tracepoint_user *tuser)
+{
+ if (!tuser->tpoint)
+ return 0UL;
+
+ return (unsigned long)tuser->tpoint->probestub;
+}
+
+static void __tracepoint_user_free(struct tracepoint_user *tuser)
+{
+ if (!tuser)
+ return;
+ kfree(tuser->name);
+ kfree(tuser);
+}
+
+DEFINE_FREE(tuser_free, struct tracepoint_user *, __tracepoint_user_free(_T))
+
+static struct tracepoint_user *__tracepoint_user_init(const char *name, struct tracepoint *tpoint)
+{
+ struct tracepoint_user *tuser __free(tuser_free) = NULL;
+ int ret;
+
+ tuser = kzalloc(sizeof(*tuser), GFP_KERNEL);
+ if (!tuser)
+ return NULL;
+ tuser->name = kstrdup(name, GFP_KERNEL);
+ if (!tuser->name)
+ return NULL;
+
+ if (tpoint) {
+ ret = tracepoint_user_register(tuser);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ tuser->tpoint = tpoint;
+ tuser->refcount = 1;
+ INIT_LIST_HEAD(&tuser->list);
+ list_add(&tuser->list, &tracepoint_user_list);
+
+ return_ptr(tuser);
+}
+
+static struct tracepoint *find_tracepoint(const char *tp_name,
+ struct module **tp_mod);
+
+/*
+ * Get tracepoint_user if exist, or allocate new one and register it.
+ * If tracepoint is on a module, get its refcounter too.
+ * This returns errno or NULL (not loaded yet) or tracepoint_user.
+ */
+static struct tracepoint_user *tracepoint_user_find_get(const char *name, struct module **pmod)
+{
+ struct module *mod __free(module_put) = NULL;
+ struct tracepoint_user *tuser;
+ struct tracepoint *tpoint;
+
+ if (!name || !pmod)
+ return ERR_PTR(-EINVAL);
+
+ /* Get and lock the module which has tracepoint. */
+ tpoint = find_tracepoint(name, &mod);
+
+ guard(mutex)(&tracepoint_user_mutex);
+ /* Search existing tracepoint_user */
+ for_each_tracepoint_user(tuser) {
+ if (!strcmp(tuser->name, name)) {
+ tuser->refcount++;
+ *pmod = no_free_ptr(mod);
+ return tuser;
+ }
+ }
+
+ /* The corresponding tracepoint_user is not found. */
+ tuser = __tracepoint_user_init(name, tpoint);
+ if (!IS_ERR_OR_NULL(tuser))
+ *pmod = no_free_ptr(mod);
+
+ return tuser;
+}
+
+static void tracepoint_user_put(struct tracepoint_user *tuser)
+{
+ scoped_guard(mutex, &tracepoint_user_mutex) {
+ if (--tuser->refcount > 0)
+ return;
+
+ list_del(&tuser->list);
+ tracepoint_user_unregister(tuser);
+ }
+
+ __tracepoint_user_free(tuser);
+}
+
+DEFINE_FREE(tuser_put, struct tracepoint_user *,
+ if (!IS_ERR_OR_NULL(_T))
+ tracepoint_user_put(_T))
+
/*
* Fprobe event core functions
*/
+
+/*
+ * @tprobe is true for tracepoint probe.
+ * @tuser can be NULL if the trace_fprobe is disabled or the tracepoint is not
+ * loaded with a module. If @tuser != NULL, this trace_fprobe is enabled.
+ */
struct trace_fprobe {
struct dyn_event devent;
struct fprobe fp;
const char *symbol;
- struct tracepoint *tpoint;
- struct module *mod;
+ bool tprobe;
+ struct tracepoint_user *tuser;
struct trace_probe tp;
};
@@ -76,7 +219,7 @@ static bool trace_fprobe_is_return(struct trace_fprobe *tf)
static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf)
{
- return tf->tpoint != NULL;
+ return tf->tprobe;
}
static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
@@ -411,6 +554,8 @@ static void free_trace_fprobe(struct trace_fprobe *tf)
{
if (tf) {
trace_probe_cleanup(&tf->tp);
+ if (tf->tuser)
+ tracepoint_user_put(tf->tuser);
kfree(tf->symbol);
kfree(tf);
}
@@ -425,9 +570,8 @@ DEFINE_FREE(free_trace_fprobe, struct trace_fprobe *, if (!IS_ERR_OR_NULL(_T)) f
static struct trace_fprobe *alloc_trace_fprobe(const char *group,
const char *event,
const char *symbol,
- struct tracepoint *tpoint,
- struct module *mod,
- int nargs, bool is_return)
+ int nargs, bool is_return,
+ bool is_tracepoint)
{
struct trace_fprobe *tf __free(free_trace_fprobe) = NULL;
int ret = -ENOMEM;
@@ -445,8 +589,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
else
tf->fp.entry_handler = fentry_dispatcher;
- tf->tpoint = tpoint;
- tf->mod = mod;
+ tf->tprobe = is_tracepoint;
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0)
@@ -469,98 +612,6 @@ static struct trace_fprobe *find_trace_fprobe(const char *event,
return NULL;
}
-static inline int __enable_trace_fprobe(struct trace_fprobe *tf)
-{
- if (trace_fprobe_is_registered(tf))
- enable_fprobe(&tf->fp);
-
- return 0;
-}
-
-static void __disable_trace_fprobe(struct trace_probe *tp)
-{
- struct trace_fprobe *tf;
-
- list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
- if (!trace_fprobe_is_registered(tf))
- continue;
- disable_fprobe(&tf->fp);
- }
-}
-
-/*
- * Enable trace_probe
- * if the file is NULL, enable "perf" handler, or enable "trace" handler.
- */
-static int enable_trace_fprobe(struct trace_event_call *call,
- struct trace_event_file *file)
-{
- struct trace_probe *tp;
- struct trace_fprobe *tf;
- bool enabled;
- int ret = 0;
-
- tp = trace_probe_primary_from_call(call);
- if (WARN_ON_ONCE(!tp))
- return -ENODEV;
- enabled = trace_probe_is_enabled(tp);
-
- /* This also changes "enabled" state */
- if (file) {
- ret = trace_probe_add_file(tp, file);
- if (ret)
- return ret;
- } else
- trace_probe_set_flag(tp, TP_FLAG_PROFILE);
-
- if (!enabled) {
- list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
- /* TODO: check the fprobe is gone */
- __enable_trace_fprobe(tf);
- }
- }
-
- return 0;
-}
-
-/*
- * Disable trace_probe
- * if the file is NULL, disable "perf" handler, or disable "trace" handler.
- */
-static int disable_trace_fprobe(struct trace_event_call *call,
- struct trace_event_file *file)
-{
- struct trace_probe *tp;
-
- tp = trace_probe_primary_from_call(call);
- if (WARN_ON_ONCE(!tp))
- return -ENODEV;
-
- if (file) {
- if (!trace_probe_get_file_link(tp, file))
- return -ENOENT;
- if (!trace_probe_has_single_file(tp))
- goto out;
- trace_probe_clear_flag(tp, TP_FLAG_TRACE);
- } else
- trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
-
- if (!trace_probe_is_enabled(tp))
- __disable_trace_fprobe(tp);
-
- out:
- if (file)
- /*
- * Synchronization is done in below function. For perf event,
- * file == NULL and perf_trace_event_unreg() calls
- * tracepoint_synchronize_unregister() to ensure synchronize
- * event. We don't need to care about it.
- */
- trace_probe_remove_file(tp, file);
-
- return 0;
-}
-
/* Event entry printers */
static enum print_line_t
print_fentry_event(struct trace_iterator *iter, int flags,
@@ -712,20 +763,52 @@ static int unregister_fprobe_event(struct trace_fprobe *tf)
static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf)
{
- struct tracepoint *tpoint = tf->tpoint;
- unsigned long ip = (unsigned long)tpoint->probestub;
+ struct tracepoint_user *tuser __free(tuser_put) = NULL;
+ struct module *mod __free(module_put) = NULL;
+ unsigned long ip;
int ret;
+ if (WARN_ON_ONCE(tf->tuser))
+ return -EINVAL;
+
+ /* If the tracepoint is in a module, it must be locked in this function. */
+ tuser = tracepoint_user_find_get(tf->symbol, &mod);
+ /* This tracepoint is not loaded yet */
+ if (IS_ERR(tuser))
+ return PTR_ERR(tuser);
+ if (!tuser)
+ return -ENOMEM;
+
+ /* Register fprobe only if the tracepoint is loaded. */
+ if (tuser->tpoint) {
+ ip = tracepoint_user_ip(tuser);
+ if (WARN_ON_ONCE(!ip))
+ return -ENOENT;
+
+ ret = register_fprobe_ips(&tf->fp, &ip, 1);
+ if (ret < 0)
+ return ret;
+ }
+
+ tf->tuser = no_free_ptr(tuser);
+ return 0;
+}
+
+/* Returns an error if the target function is not available, or 0 */
+static int trace_fprobe_verify_target(struct trace_fprobe *tf)
+{
+ int ret;
+
+ /* Tracepoint should have a stub function. */
+ if (trace_fprobe_is_tracepoint(tf))
+ return 0;
+
/*
- * Here, we do 2 steps to enable fprobe on a tracepoint.
- * At first, put __probestub_##TP function on the tracepoint
- * and put a fprobe on the stub function.
+ * Note: since we don't lock the module, even if this succeeded,
+ * register_fprobe() later can fail.
*/
- ret = tracepoint_probe_register_prio_may_exist(tpoint,
- tpoint->probestub, NULL, 0);
- if (ret < 0)
- return ret;
- return register_fprobe_ips(&tf->fp, &ip, 1);
+ ret = fprobe_count_ips_from_filter(tf->symbol, NULL);
+ return (ret < 0) ? ret : 0;
}
/* Internal register function - just handle fprobe and flags */
@@ -747,20 +830,10 @@ static int __register_trace_fprobe(struct trace_fprobe *tf)
return ret;
}
- /* Set/clear disabled flag according to tp->flag */
- if (trace_probe_is_enabled(&tf->tp))
- tf->fp.flags &= ~FPROBE_FL_DISABLED;
- else
- tf->fp.flags |= FPROBE_FL_DISABLED;
-
- if (trace_fprobe_is_tracepoint(tf)) {
-
- /* This tracepoint is not loaded yet */
- if (tf->tpoint == TRACEPOINT_STUB)
- return 0;
+ tf->fp.flags &= ~FPROBE_FL_DISABLED;
+ if (trace_fprobe_is_tracepoint(tf))
return __regsiter_tracepoint_fprobe(tf);
- }
/* TODO: handle filter, nofilter or symbol list */
return register_fprobe(&tf->fp, tf->symbol, NULL);
@@ -769,15 +842,11 @@ static int __register_trace_fprobe(struct trace_fprobe *tf)
/* Internal unregister function - just handle fprobe and flags */
static void __unregister_trace_fprobe(struct trace_fprobe *tf)
{
- if (trace_fprobe_is_registered(tf)) {
+ if (trace_fprobe_is_registered(tf))
unregister_fprobe(&tf->fp);
- memset(&tf->fp, 0, sizeof(tf->fp));
- if (trace_fprobe_is_tracepoint(tf)) {
- tracepoint_probe_unregister(tf->tpoint,
- tf->tpoint->probestub, NULL);
- tf->tpoint = NULL;
- tf->mod = NULL;
- }
+ if (tf->tuser) {
+ tracepoint_user_put(tf->tuser);
+ tf->tuser = NULL;
}
}
@@ -837,7 +906,7 @@ static bool trace_fprobe_has_same_fprobe(struct trace_fprobe *orig,
return false;
}
-static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
+static int append_trace_fprobe_event(struct trace_fprobe *tf, struct trace_fprobe *to)
{
int ret;
@@ -865,7 +934,7 @@ static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
if (ret)
return ret;
- ret = __register_trace_fprobe(tf);
+ ret = trace_fprobe_verify_target(tf);
if (ret)
trace_probe_unlink(&tf->tp);
else
@@ -874,8 +943,8 @@ static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
return ret;
}
-/* Register a trace_probe and probe_event */
-static int register_trace_fprobe(struct trace_fprobe *tf)
+/* Register a trace_probe and probe_event, and check the fprobe is available. */
+static int register_trace_fprobe_event(struct trace_fprobe *tf)
{
struct trace_fprobe *old_tf;
int ret;
@@ -885,7 +954,7 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
old_tf = find_trace_fprobe(trace_probe_name(&tf->tp),
trace_probe_group_name(&tf->tp));
if (old_tf)
- return append_trace_fprobe(tf, old_tf);
+ return append_trace_fprobe_event(tf, old_tf);
/* Register new event */
ret = register_fprobe_event(tf);
@@ -898,8 +967,8 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
return ret;
}
- /* Register fprobe */
- ret = __register_trace_fprobe(tf);
+ /* Verify fprobe is sane. */
+ ret = trace_fprobe_verify_target(tf);
if (ret < 0)
unregister_fprobe_event(tf);
else
@@ -963,15 +1032,6 @@ static struct tracepoint *find_tracepoint(const char *tp_name,
}
#ifdef CONFIG_MODULES
-static void reenable_trace_fprobe(struct trace_fprobe *tf)
-{
- struct trace_probe *tp = &tf->tp;
-
- list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
- __enable_trace_fprobe(tf);
- }
-}
-
/*
* Find a tracepoint from specified module. In this case, this does not get the
* module's refcount. The caller must ensure the module is not freed.
@@ -988,36 +1048,94 @@ static struct tracepoint *find_tracepoint_in_module(struct module *mod,
return data.tpoint;
}
+/* These are CONFIG_MODULES=y specific functions. */
+static bool tracepoint_user_within_module(struct tracepoint_user *tuser,
+ struct module *mod)
+{
+ return within_module(tracepoint_user_ip(tuser), mod);
+}
+
+static int tracepoint_user_register_again(struct tracepoint_user *tuser,
+ struct tracepoint *tpoint)
+{
+ tuser->tpoint = tpoint;
+ return tracepoint_user_register(tuser);
+}
+
+static void tracepoint_user_unregister_clear(struct tracepoint_user *tuser)
+{
+ tracepoint_user_unregister(tuser);
+ tuser->tpoint = NULL;
+}
+
+/* module callback for tracepoint_user */
static int __tracepoint_probe_module_cb(struct notifier_block *self,
unsigned long val, void *data)
{
struct tp_module *tp_mod = data;
+ struct tracepoint_user *tuser;
struct tracepoint *tpoint;
+
+ if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING)
+ return NOTIFY_DONE;
+
+ mutex_lock(&tracepoint_user_mutex);
+ for_each_tracepoint_user(tuser) {
+ if (val == MODULE_STATE_COMING) {
+ /* This is not a tracepoint in this module. Skip it. */
+ tpoint = find_tracepoint_in_module(tp_mod->mod, tuser->name);
+ if (!tpoint)
+ continue;
+ WARN_ON_ONCE(tracepoint_user_register_again(tuser, tpoint));
+ } else if (val == MODULE_STATE_GOING &&
+ tracepoint_user_within_module(tuser, tp_mod->mod)) {
+ /* Unregister all tracepoint_user in this module. */
+ tracepoint_user_unregister_clear(tuser);
+ }
+ }
+ mutex_unlock(&tracepoint_user_mutex);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block tracepoint_module_nb = {
+ .notifier_call = __tracepoint_probe_module_cb,
+};
+
+/* module callback for tprobe events */
+static int __tprobe_event_module_cb(struct notifier_block *self,
+ unsigned long val, void *data)
+{
struct trace_fprobe *tf;
struct dyn_event *pos;
+ struct module *mod = data;
if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING)
return NOTIFY_DONE;
mutex_lock(&event_mutex);
for_each_trace_fprobe(tf, pos) {
- if (val == MODULE_STATE_COMING && tf->tpoint == TRACEPOINT_STUB) {
- tpoint = find_tracepoint_in_module(tp_mod->mod, tf->symbol);
- if (tpoint) {
- tf->tpoint = tpoint;
- tf->mod = tp_mod->mod;
- if (!WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)) &&
- trace_probe_is_enabled(&tf->tp))
- reenable_trace_fprobe(tf);
- }
- } else if (val == MODULE_STATE_GOING && tp_mod->mod == tf->mod) {
+ /* Skip fprobe and disabled tprobe events. */
+ if (!trace_fprobe_is_tracepoint(tf) || !tf->tuser)
+ continue;
+
+ /* Before this notification, tracepoint notifier has already done. */
+ if (val == MODULE_STATE_COMING &&
+ tracepoint_user_within_module(tf->tuser, mod)) {
+ unsigned long ip = tracepoint_user_ip(tf->tuser);
+
+ WARN_ON_ONCE(register_fprobe_ips(&tf->fp, &ip, 1));
+ } else if (val == MODULE_STATE_GOING &&
+ /*
+ * tracepoint_user_within_module() does not work here because
+ * tracepoint_user is already unregistered and cleared tpoint.
+ * Instead, checking whether the fprobe is registered but
+ * tpoint is cleared(unregistered). Such unbalance probes
+ * must be adjusted anyway.
+ */
+ trace_fprobe_is_registered(tf) &&
+ !tf->tuser->tpoint) {
unregister_fprobe(&tf->fp);
- if (trace_fprobe_is_tracepoint(tf)) {
- tracepoint_probe_unregister(tf->tpoint,
- tf->tpoint->probestub, NULL);
- tf->tpoint = TRACEPOINT_STUB;
- tf->mod = NULL;
- }
}
}
mutex_unlock(&event_mutex);
@@ -1025,8 +1143,11 @@ static int __tracepoint_probe_module_cb(struct notifier_block *self,
return NOTIFY_DONE;
}
-static struct notifier_block tracepoint_module_nb = {
- .notifier_call = __tracepoint_probe_module_cb,
+/* NOTE: this must be called after tracepoint callback */
+static struct notifier_block tprobe_event_module_nb = {
+ .notifier_call = __tprobe_event_module_cb,
+ /* Make sure this is later than tracepoint module notifier. */
+ .priority = -10,
};
#endif /* CONFIG_MODULES */
@@ -1086,8 +1207,6 @@ static int parse_symbol_and_return(int argc, const char *argv[],
return 0;
}
-DEFINE_FREE(module_put, struct module *, if (_T) module_put(_T))
-
static int trace_fprobe_create_internal(int argc, const char *argv[],
struct traceprobe_parse_context *ctx)
{
@@ -1116,19 +1235,18 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
struct trace_fprobe *tf __free(free_trace_fprobe) = NULL;
- int i, new_argc = 0, ret = 0;
- bool is_return = false;
- char *symbol __free(kfree) = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
+ struct module *mod __free(module_put) = NULL;
const char **new_argv __free(kfree) = NULL;
- char buf[MAX_EVENT_NAME_LEN];
- char gbuf[MAX_EVENT_NAME_LEN];
- char sbuf[KSYM_NAME_LEN];
- char abuf[MAX_BTF_ARGS_LEN];
+ char *symbol __free(kfree) = NULL;
+ char *ebuf __free(kfree) = NULL;
+ char *gbuf __free(kfree) = NULL;
+ char *sbuf __free(kfree) = NULL;
+ char *abuf __free(kfree) = NULL;
char *dbuf __free(kfree) = NULL;
+ int i, new_argc = 0, ret = 0;
bool is_tracepoint = false;
- struct module *tp_mod __free(module_put) = NULL;
- struct tracepoint *tpoint = NULL;
+ bool is_return = false;
if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2)
return -ECANCELED;
@@ -1156,6 +1274,9 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
trace_probe_log_set_index(0);
if (event) {
+ gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+ if (!gbuf)
+ return -ENOMEM;
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
@@ -1163,15 +1284,18 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
}
if (!event) {
+ ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+ if (!ebuf)
+ return -ENOMEM;
/* Make a new event name */
if (is_tracepoint)
- snprintf(buf, MAX_EVENT_NAME_LEN, "%s%s",
+ snprintf(ebuf, MAX_EVENT_NAME_LEN, "%s%s",
isdigit(*symbol) ? "_" : "", symbol);
else
- snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
+ snprintf(ebuf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
is_return ? "exit" : "entry");
- sanitize_event_name(buf);
- event = buf;
+ sanitize_event_name(ebuf);
+ event = ebuf;
}
if (is_return)
@@ -1179,25 +1303,28 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
else
ctx->flags |= TPARG_FL_FENTRY;
+ ctx->funcname = NULL;
if (is_tracepoint) {
+ /* Get tracepoint and lock its module until the end of the registration. */
+ struct tracepoint *tpoint;
+
ctx->flags |= TPARG_FL_TPOINT;
- tpoint = find_tracepoint(symbol, &tp_mod);
+ mod = NULL;
+ tpoint = find_tracepoint(symbol, &mod);
if (tpoint) {
- ctx->funcname = kallsyms_lookup(
- (unsigned long)tpoint->probestub,
- NULL, NULL, NULL, sbuf);
- } else if (IS_ENABLED(CONFIG_MODULES)) {
- /* This *may* be loaded afterwards */
- tpoint = TRACEPOINT_STUB;
- ctx->funcname = symbol;
- } else {
- trace_probe_log_set_index(1);
- trace_probe_log_err(0, NO_TRACEPOINT);
- return -EINVAL;
+ sbuf = kmalloc(KSYM_NAME_LEN, GFP_KERNEL);
+ if (!sbuf)
+ return -ENOMEM;
+ ctx->funcname = kallsyms_lookup((unsigned long)tpoint->probestub,
+ NULL, NULL, NULL, sbuf);
}
- } else
+ }
+ if (!ctx->funcname)
ctx->funcname = symbol;
+ abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL);
+ if (!abuf)
+ return -ENOMEM;
argc -= 2; argv += 2;
new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
abuf, MAX_BTF_ARGS_LEN, ctx);
@@ -1218,8 +1345,7 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
return ret;
/* setup a probe */
- tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
- argc, is_return);
+ tf = alloc_trace_fprobe(group, event, symbol, argc, is_return, is_tracepoint);
if (IS_ERR(tf)) {
ret = PTR_ERR(tf);
/* This must return -ENOMEM, else there is a bug */
@@ -1251,7 +1377,7 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
if (ret < 0)
return ret;
- ret = register_trace_fprobe(tf);
+ ret = register_trace_fprobe_event(tf);
if (ret) {
trace_probe_log_set_index(1);
if (ret == -EILSEQ)
@@ -1271,14 +1397,17 @@ static int trace_fprobe_create_internal(int argc, const char *argv[],
static int trace_fprobe_create_cb(int argc, const char *argv[])
{
- struct traceprobe_parse_context ctx = {
- .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
- };
+ struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL;
int ret;
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE;
+
trace_probe_log_init("trace_fprobe", argc, argv);
- ret = trace_fprobe_create_internal(argc, argv, &ctx);
- traceprobe_finish_parse(&ctx);
+ ret = trace_fprobe_create_internal(argc, argv, ctx);
trace_probe_log_clear();
return ret;
}
@@ -1321,6 +1450,84 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
}
/*
+ * Enable trace_probe
+ * if the file is NULL, enable "perf" handler, or enable "trace" handler.
+ */
+static int enable_trace_fprobe(struct trace_event_call *call,
+ struct trace_event_file *file)
+{
+ struct trace_probe *tp;
+ struct trace_fprobe *tf;
+ bool enabled;
+ int ret = 0;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENODEV;
+ enabled = trace_probe_is_enabled(tp);
+
+ /* This also changes "enabled" state */
+ if (file) {
+ ret = trace_probe_add_file(tp, file);
+ if (ret)
+ return ret;
+ } else
+ trace_probe_set_flag(tp, TP_FLAG_PROFILE);
+
+ if (!enabled) {
+ list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
+ ret = __register_trace_fprobe(tf);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Disable trace_probe
+ * if the file is NULL, disable "perf" handler, or disable "trace" handler.
+ */
+static int disable_trace_fprobe(struct trace_event_call *call,
+ struct trace_event_file *file)
+{
+ struct trace_fprobe *tf;
+ struct trace_probe *tp;
+
+ tp = trace_probe_primary_from_call(call);
+ if (WARN_ON_ONCE(!tp))
+ return -ENODEV;
+
+ if (file) {
+ if (!trace_probe_get_file_link(tp, file))
+ return -ENOENT;
+ if (!trace_probe_has_single_file(tp))
+ goto out;
+ trace_probe_clear_flag(tp, TP_FLAG_TRACE);
+ } else
+ trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+
+ if (!trace_probe_is_enabled(tp)) {
+ list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
+ unregister_fprobe(&tf->fp);
+ }
+ }
+
+ out:
+ if (file)
+ /*
+ * Synchronization is done in below function. For perf event,
+ * file == NULL and perf_trace_event_unreg() calls
+ * tracepoint_synchronize_unregister() to ensure synchronize
+ * event. We don't need to care about it.
+ */
+ trace_probe_remove_file(tp, file);
+
+ return 0;
+}
+
+/*
* called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
*/
static int fprobe_register(struct trace_event_call *event,
@@ -1365,6 +1572,9 @@ static __init int init_fprobe_trace_early(void)
ret = register_tracepoint_module_notifier(&tracepoint_module_nb);
if (ret)
return ret;
+ ret = register_module_notifier(&tprobe_event_module_nb);
+ if (ret)
+ return ret;
#endif
return 0;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 4e37a0f6aaa3..d17c18934445 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -209,7 +209,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = op->private;
- struct trace_array_cpu *data;
unsigned int trace_ctx;
int bit;
@@ -224,9 +223,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
trace_ctx = tracing_gen_ctx_dec();
- data = this_cpu_ptr(tr->array_buffer.data);
- if (!atomic_read(&data->disabled))
- trace_function(tr, ip, parent_ip, trace_ctx, NULL);
+ trace_function(tr, ip, parent_ip, trace_ctx, NULL);
ftrace_test_recursion_unlock(bit);
}
@@ -236,10 +233,8 @@ function_args_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
struct trace_array *tr = op->private;
- struct trace_array_cpu *data;
unsigned int trace_ctx;
int bit;
- int cpu;
if (unlikely(!tr->function_enabled))
return;
@@ -250,10 +245,7 @@ function_args_trace_call(unsigned long ip, unsigned long parent_ip,
trace_ctx = tracing_gen_ctx();
- cpu = smp_processor_id();
- data = per_cpu_ptr(tr->array_buffer.data, cpu);
- if (!atomic_read(&data->disabled))
- trace_function(tr, ip, parent_ip, trace_ctx, fregs);
+ trace_function(tr, ip, parent_ip, trace_ctx, fregs);
ftrace_test_recursion_unlock(bit);
}
@@ -299,7 +291,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
parent_ip = function_get_true_parent_ip(parent_ip, fregs);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_inc_return(&data->disabled);
+ disabled = local_inc_return(&data->disabled);
if (likely(disabled == 1)) {
trace_ctx = tracing_gen_ctx_flags(flags);
@@ -311,7 +303,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
__trace_stack(tr, trace_ctx, skip);
}
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
local_irq_restore(flags);
}
@@ -352,7 +344,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
{
struct trace_func_repeats *last_info;
struct trace_array *tr = op->private;
- struct trace_array_cpu *data;
unsigned int trace_ctx;
int bit;
@@ -364,8 +355,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
return;
parent_ip = function_get_true_parent_ip(parent_ip, fregs);
- data = this_cpu_ptr(tr->array_buffer.data);
- if (atomic_read(&data->disabled))
+ if (!tracer_tracing_is_on(tr))
goto out;
/*
@@ -412,7 +402,7 @@ function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
parent_ip = function_get_true_parent_ip(parent_ip, fregs);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_inc_return(&data->disabled);
+ disabled = local_inc_return(&data->disabled);
if (likely(disabled == 1)) {
last_info = per_cpu_ptr(tr->last_func_repeats, cpu);
@@ -427,7 +417,7 @@ function_stack_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
}
out:
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
local_irq_restore(flags);
}
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 0c357a89c58e..66e1a527cf1a 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -202,12 +202,9 @@ static int graph_entry(struct ftrace_graph_ent *trace,
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
- struct trace_array_cpu *data;
struct fgraph_times *ftimes;
unsigned int trace_ctx;
- long disabled;
int ret = 0;
- int cpu;
if (*task_var & TRACE_GRAPH_NOTRACE)
return 0;
@@ -257,21 +254,14 @@ static int graph_entry(struct ftrace_graph_ent *trace,
if (tracing_thresh)
return 1;
- preempt_disable_notrace();
- cpu = raw_smp_processor_id();
- data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_read(&data->disabled);
- if (likely(!disabled)) {
- trace_ctx = tracing_gen_ctx();
- if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
- tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR)) {
- unsigned long retaddr = ftrace_graph_top_ret_addr(current);
- ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
- } else {
- ret = __graph_entry(tr, trace, trace_ctx, fregs);
- }
+ trace_ctx = tracing_gen_ctx();
+ if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
+ tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR)) {
+ unsigned long retaddr = ftrace_graph_top_ret_addr(current);
+ ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
+ } else {
+ ret = __graph_entry(tr, trace, trace_ctx, fregs);
}
- preempt_enable_notrace();
return ret;
}
@@ -351,13 +341,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
- struct trace_array_cpu *data;
struct fgraph_times *ftimes;
unsigned int trace_ctx;
u64 calltime, rettime;
- long disabled;
int size;
- int cpu;
rettime = trace_clock_local();
@@ -376,15 +363,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
calltime = ftimes->calltime;
- preempt_disable_notrace();
- cpu = raw_smp_processor_id();
- data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_read(&data->disabled);
- if (likely(!disabled)) {
- trace_ctx = tracing_gen_ctx();
- __trace_graph_return(tr, trace, trace_ctx, calltime, rettime);
- }
- preempt_enable_notrace();
+ trace_ctx = tracing_gen_ctx();
+ __trace_graph_return(tr, trace, trace_ctx, calltime, rettime);
}
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
@@ -475,10 +455,16 @@ static int graph_trace_init(struct trace_array *tr)
return 0;
}
+static struct tracer graph_trace;
+
static int ftrace_graph_trace_args(struct trace_array *tr, int set)
{
trace_func_graph_ent_t entry;
+ /* Do nothing if the current tracer is not this tracer */
+ if (tr->current_trace != &graph_trace)
+ return 0;
+
if (set)
entry = trace_graph_entry_args;
else
@@ -527,7 +513,7 @@ static void print_graph_proc(struct trace_seq *s, pid_t pid)
{
char comm[TASK_COMM_LEN];
/* sign + log10(MAX_INT) + '\0' */
- char pid_str[11];
+ char pid_str[12];
int spaces = 0;
int len;
int i;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 40c39e946940..5496758b6c76 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -123,12 +123,12 @@ static int func_prolog_dec(struct trace_array *tr,
return 0;
*data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_inc_return(&(*data)->disabled);
+ disabled = local_inc_return(&(*data)->disabled);
if (likely(disabled == 1))
return 1;
- atomic_dec(&(*data)->disabled);
+ local_dec(&(*data)->disabled);
return 0;
}
@@ -152,7 +152,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
trace_function(tr, ip, parent_ip, trace_ctx, fregs);
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
}
#endif /* CONFIG_FUNCTION_TRACER */
@@ -209,7 +209,7 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
trace_ctx = tracing_gen_ctx_flags(flags);
ret = __trace_graph_entry(tr, trace, trace_ctx);
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
return ret;
}
@@ -238,7 +238,7 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace,
trace_ctx = tracing_gen_ctx_flags(flags);
__trace_graph_return(tr, trace, trace_ctx, *calltime, rettime);
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
}
static struct fgraph_ops fgraph_ops = {
@@ -397,6 +397,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
int cpu;
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
+ long disabled;
if (!tracer_enabled || !tracing_is_enabled())
return;
@@ -408,20 +409,22 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
data = per_cpu_ptr(tr->array_buffer.data, cpu);
- if (unlikely(!data) || atomic_read(&data->disabled))
+ if (unlikely(!data) || local_read(&data->disabled))
return;
- atomic_inc(&data->disabled);
+ disabled = local_inc_return(&data->disabled);
- data->critical_sequence = max_sequence;
- data->preempt_timestamp = ftrace_now(cpu);
- data->critical_start = parent_ip ? : ip;
+ if (disabled == 1) {
+ data->critical_sequence = max_sequence;
+ data->preempt_timestamp = ftrace_now(cpu);
+ data->critical_start = parent_ip ? : ip;
- __trace_function(tr, ip, parent_ip, tracing_gen_ctx());
+ __trace_function(tr, ip, parent_ip, tracing_gen_ctx());
- per_cpu(tracing_cpu, cpu) = 1;
+ per_cpu(tracing_cpu, cpu) = 1;
+ }
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
}
static nokprobe_inline void
@@ -431,6 +434,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned int trace_ctx;
+ long disabled;
cpu = raw_smp_processor_id();
/* Always clear the tracing cpu on stopping the trace */
@@ -445,16 +449,19 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
data = per_cpu_ptr(tr->array_buffer.data, cpu);
if (unlikely(!data) ||
- !data->critical_start || atomic_read(&data->disabled))
+ !data->critical_start || local_read(&data->disabled))
return;
- atomic_inc(&data->disabled);
+ disabled = local_inc_return(&data->disabled);
- trace_ctx = tracing_gen_ctx();
- __trace_function(tr, ip, parent_ip, trace_ctx);
- check_critical_timing(tr, data, parent_ip ? : ip, cpu);
- data->critical_start = 0;
- atomic_dec(&data->disabled);
+ if (disabled == 1) {
+ trace_ctx = tracing_gen_ctx();
+ __trace_function(tr, ip, parent_ip, trace_ctx);
+ check_critical_timing(tr, data, parent_ip ? : ip, cpu);
+ data->critical_start = 0;
+ }
+
+ local_dec(&data->disabled);
}
/* start and stop critical timings used to for stoppage (in idle) */
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 1e72d20b3c2f..896ff78b8349 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -43,17 +43,15 @@ static void ftrace_dump_buf(int skip_entries, long cpu_file)
if (cpu_file == RING_BUFFER_ALL_CPUS) {
for_each_tracing_cpu(cpu) {
iter.buffer_iter[cpu] =
- ring_buffer_read_prepare(iter.array_buffer->buffer,
- cpu, GFP_ATOMIC);
- ring_buffer_read_start(iter.buffer_iter[cpu]);
+ ring_buffer_read_start(iter.array_buffer->buffer,
+ cpu, GFP_ATOMIC);
tracing_iter_reset(&iter, cpu);
}
} else {
iter.cpu_file = cpu_file;
iter.buffer_iter[cpu_file] =
- ring_buffer_read_prepare(iter.array_buffer->buffer,
+ ring_buffer_read_start(iter.array_buffer->buffer,
cpu_file, GFP_ATOMIC);
- ring_buffer_read_start(iter.buffer_iter[cpu_file]);
tracing_iter_reset(&iter, cpu_file);
}
@@ -98,7 +96,6 @@ static int kdb_ftdump(int argc, const char **argv)
long cpu_file;
int err;
int cnt;
- int cpu;
if (argc > 2)
return KDB_ARGCOUNT;
@@ -120,9 +117,7 @@ static int kdb_ftdump(int argc, const char **argv)
trace_init_global_iter(&iter);
iter.buffer_iter = buffer_iter;
- for_each_tracing_cpu(cpu) {
- atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
- }
+ tracer_tracing_disable(iter.tr);
/* A negative skip_entries means skip all but the last entries */
if (skip_entries < 0) {
@@ -135,9 +130,7 @@ static int kdb_ftdump(int argc, const char **argv)
ftrace_dump_buf(skip_entries, cpu_file);
- for_each_tracing_cpu(cpu) {
- atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
- }
+ tracer_tracing_enable(iter.tr);
kdb_trap_printk--;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3e5c47b6d7b2..ccae62d4fb91 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -9,19 +9,19 @@
#include <linux/bpf-cgroup.h>
#include <linux/cleanup.h>
-#include <linux/security.h>
+#include <linux/error-injection.h>
#include <linux/module.h>
-#include <linux/uaccess.h>
#include <linux/rculist.h>
-#include <linux/error-injection.h>
+#include <linux/security.h>
+#include <linux/uaccess.h>
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
#include "trace_dynevent.h"
#include "trace_kprobe_selftest.h"
#include "trace_probe.h"
-#include "trace_probe_tmpl.h"
#include "trace_probe_kernel.h"
+#include "trace_probe_tmpl.h"
#define KPROBE_EVENT_SYSTEM "kprobes"
#define KRETPROBE_MAXACTIVE_MAX 4096
@@ -861,20 +861,20 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
struct trace_kprobe *tk __free(free_trace_kprobe) = NULL;
+ const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
+ const char **new_argv __free(kfree) = NULL;
int i, len, new_argc = 0, ret = 0;
- bool is_return = false;
char *symbol __free(kfree) = NULL;
- char *tmp = NULL;
- const char **new_argv __free(kfree) = NULL;
- const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
+ char *ebuf __free(kfree) = NULL;
+ char *gbuf __free(kfree) = NULL;
+ char *abuf __free(kfree) = NULL;
+ char *dbuf __free(kfree) = NULL;
enum probe_print_type ptype;
+ bool is_return = false;
int maxactive = 0;
- long offset = 0;
void *addr = NULL;
- char buf[MAX_EVENT_NAME_LEN];
- char gbuf[MAX_EVENT_NAME_LEN];
- char abuf[MAX_BTF_ARGS_LEN];
- char *dbuf __free(kfree) = NULL;
+ char *tmp = NULL;
+ long offset = 0;
switch (argv[0][0]) {
case 'r':
@@ -893,6 +893,8 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
event++;
if (isdigit(argv[0][1])) {
+ char *buf __free(kfree) = NULL;
+
if (!is_return) {
trace_probe_log_err(1, BAD_MAXACT_TYPE);
return -EINVAL;
@@ -905,7 +907,7 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
trace_probe_log_err(1, BAD_MAXACT);
return -EINVAL;
}
- memcpy(buf, &argv[0][1], len);
+ buf = kmemdup(&argv[0][1], len + 1, GFP_KERNEL);
buf[len] = '\0';
ret = kstrtouint(buf, 0, &maxactive);
if (ret || !maxactive) {
@@ -973,6 +975,9 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
trace_probe_log_set_index(0);
if (event) {
+ gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+ if (!gbuf)
+ return -ENOMEM;
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
@@ -981,16 +986,22 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
if (!event) {
/* Make a new event name */
+ ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+ if (!ebuf)
+ return -ENOMEM;
if (symbol)
- snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
+ snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
is_return ? 'r' : 'p', symbol, offset);
else
- snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
+ snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_0x%p",
is_return ? 'r' : 'p', addr);
- sanitize_event_name(buf);
- event = buf;
+ sanitize_event_name(ebuf);
+ event = ebuf;
}
+ abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL);
+ if (!abuf)
+ return -ENOMEM;
argc -= 2; argv += 2;
ctx->funcname = symbol;
new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
@@ -1065,14 +1076,18 @@ static int trace_kprobe_create_internal(int argc, const char *argv[],
static int trace_kprobe_create_cb(int argc, const char *argv[])
{
- struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL };
+ struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL;
int ret;
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->flags = TPARG_FL_KERNEL;
+
trace_probe_log_init("trace_kprobe", argc, argv);
- ret = trace_kprobe_create_internal(argc, argv, &ctx);
+ ret = trace_kprobe_create_internal(argc, argv, ctx);
- traceprobe_finish_parse(&ctx);
trace_probe_log_clear();
return ret;
}
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index ba5858866b2f..c706544be60c 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -291,7 +291,6 @@ __init static int init_mmio_trace(void)
device_initcall(init_mmio_trace);
static void __trace_mmiotrace_rw(struct trace_array *tr,
- struct trace_array_cpu *data,
struct mmiotrace_rw *rw)
{
struct trace_buffer *buffer = tr->array_buffer.buffer;
@@ -315,12 +314,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
void mmio_trace_rw(struct mmiotrace_rw *rw)
{
struct trace_array *tr = mmio_trace_array;
- struct trace_array_cpu *data = per_cpu_ptr(tr->array_buffer.data, smp_processor_id());
- __trace_mmiotrace_rw(tr, data, rw);
+ __trace_mmiotrace_rw(tr, rw);
}
static void __trace_mmiotrace_map(struct trace_array *tr,
- struct trace_array_cpu *data,
struct mmiotrace_map *map)
{
struct trace_buffer *buffer = tr->array_buffer.buffer;
@@ -344,12 +341,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
void mmio_trace_mapping(struct mmiotrace_map *map)
{
struct trace_array *tr = mmio_trace_array;
- struct trace_array_cpu *data;
-
- preempt_disable();
- data = per_cpu_ptr(tr->array_buffer.data, smp_processor_id());
- __trace_mmiotrace_map(tr, data, map);
- preempt_enable();
+ __trace_mmiotrace_map(tr, map);
}
int mmio_trace_printk(const char *fmt, va_list args)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index e732c9e37e14..fd259da0aa64 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -637,8 +637,8 @@ __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, u
entry = ring_buffer_event_data(event);
- memcpy(&entry->caller, fstack->calls, size);
entry->size = fstack->nr_entries;
+ memcpy(&entry->caller, fstack->calls, size);
trace_buffer_unlock_commit_nostack(buffer, event);
}
@@ -2302,7 +2302,7 @@ osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
* osnoise_cpus_write - Write function for "cpus" entry
* @filp: The active open file structure
* @ubuf: The user buffer that contains the value to write
- * @cnt: The maximum number of bytes to write to "file"
+ * @count: The maximum number of bytes to write to "file"
* @ppos: The current position in @file
*
* This function provides a write implementation for the "cpus"
@@ -2320,10 +2320,11 @@ osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
{
cpumask_var_t osnoise_cpumask_new;
int running, err;
- char buf[256];
+ char *buf __free(kfree) = NULL;
- if (count >= 256)
- return -EINVAL;
+ buf = kmalloc(count, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
if (copy_from_user(buf, ubuf, count))
return -EFAULT;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b9ab06c99543..0b3db02030a7 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -938,6 +938,9 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c
struct list_head *head)
{
struct ftrace_event_field *field;
+ struct trace_array *tr = iter->tr;
+ unsigned long long laddr;
+ unsigned long addr;
int offset;
int len;
int ret;
@@ -974,8 +977,8 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c
case FILTER_PTR_STRING:
if (!iter->fmt_size)
trace_iter_expand_format(iter);
- pos = *(void **)pos;
- ret = strncpy_from_kernel_nofault(iter->fmt, pos,
+ addr = trace_adjust_address(tr, *(unsigned long *)pos);
+ ret = strncpy_from_kernel_nofault(iter->fmt, (void *)addr,
iter->fmt_size);
if (ret < 0)
trace_seq_printf(&iter->seq, "(0x%px)", pos);
@@ -984,8 +987,8 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c
pos, iter->fmt);
break;
case FILTER_TRACE_FN:
- pos = *(void **)pos;
- trace_seq_printf(&iter->seq, "%pS", pos);
+ addr = trace_adjust_address(tr, *(unsigned long *)pos);
+ trace_seq_printf(&iter->seq, "%pS", (void *)addr);
break;
case FILTER_CPU:
case FILTER_OTHER:
@@ -1015,14 +1018,36 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c
break;
}
- trace_seq_printf(&iter->seq, "0x%x (%d)",
- *(unsigned int *)pos,
- *(unsigned int *)pos);
+ addr = *(unsigned int *)pos;
+
+ /* Some fields reference offset from _stext. */
+ if (!strcmp(field->name, "caller_offs") ||
+ !strcmp(field->name, "parent_offs")) {
+ unsigned long ip;
+
+ ip = addr + (unsigned long)_stext;
+ ip = trace_adjust_address(tr, ip);
+ trace_seq_printf(&iter->seq, "%pS ", (void *)ip);
+ }
+
+ if (sizeof(long) == 4) {
+ addr = trace_adjust_address(tr, addr);
+ trace_seq_printf(&iter->seq, "%pS (%d)",
+ (void *)addr, (int)addr);
+ } else {
+ trace_seq_printf(&iter->seq, "0x%x (%d)",
+ (unsigned int)addr, (int)addr);
+ }
break;
case 8:
- trace_seq_printf(&iter->seq, "0x%llx (%lld)",
- *(unsigned long long *)pos,
- *(unsigned long long *)pos);
+ laddr = *(unsigned long long *)pos;
+ if (sizeof(long) == 8) {
+ laddr = trace_adjust_address(tr, (unsigned long)laddr);
+ trace_seq_printf(&iter->seq, "%pS (%lld)",
+ (void *)(long)laddr, laddr);
+ } else {
+ trace_seq_printf(&iter->seq, "0x%llx (%lld)", laddr, laddr);
+ }
break;
default:
trace_seq_puts(&iter->seq, "<INVALID-SIZE>");
@@ -1086,11 +1111,11 @@ enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
}
static void print_fn_trace(struct trace_seq *s, unsigned long ip,
- unsigned long parent_ip, long delta,
- unsigned long *args, int flags)
+ unsigned long parent_ip, unsigned long *args,
+ struct trace_array *tr, int flags)
{
- ip += delta;
- parent_ip += delta;
+ ip = trace_adjust_address(tr, ip);
+ parent_ip = trace_adjust_address(tr, parent_ip);
seq_print_ip_sym(s, ip, flags);
if (args)
@@ -1119,8 +1144,7 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
else
args = NULL;
- print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta,
- args, flags);
+ print_fn_trace(s, field->ip, field->parent_ip, args, iter->tr, flags);
trace_seq_putc(s, '\n');
return trace_handle_return(s);
@@ -1706,7 +1730,7 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
trace_assign_type(field, iter->ent);
- ip = field->ip + iter->tr->text_delta;
+ ip = trace_adjust_address(iter->tr, field->ip);
seq_print_ip_sym(s, ip, flags);
trace_seq_printf(s, ": %s", field->buf);
@@ -1792,7 +1816,7 @@ trace_func_repeats_print(struct trace_iterator *iter, int flags,
trace_assign_type(field, iter->ent);
- print_fn_trace(s, field->ip, field->parent_ip, iter->tr->text_delta, NULL, flags);
+ print_fn_trace(s, field->ip, field->parent_ip, NULL, iter->tr, flags);
trace_seq_printf(s, " (repeats: %u, last_ts:", field->count);
trace_print_time(s, iter,
iter->ts - FUNC_REPEATS_GET_DELTA_TS(field));
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 424751cdf31f..5cbdc423afeb 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -13,8 +13,8 @@
#include <linux/bpf.h>
#include <linux/fs.h>
-#include "trace_btf.h"
+#include "trace_btf.h"
#include "trace_probe.h"
#undef C
@@ -247,7 +247,25 @@ int traceprobe_split_symbol_offset(char *symbol, long *offset)
return 0;
}
-/* @buf must has MAX_EVENT_NAME_LEN size */
+/**
+ * traceprobe_parse_event_name() - Parse a string into group and event names
+ * @pevent: A pointer to the string to be parsed.
+ * @pgroup: A pointer to the group name.
+ * @buf: A buffer to store the parsed group name.
+ * @offset: The offset of the string in the original user command, for logging.
+ *
+ * This parses a string with the format `[GROUP/][EVENT]` or `[GROUP.][EVENT]`
+ * (either GROUP or EVENT or both must be specified).
+ * Since the parsed group name is stored in @buf, the caller must ensure @buf
+ * is at least MAX_EVENT_NAME_LEN bytes.
+ *
+ * Return: 0 on success, or -EINVAL on failure.
+ *
+ * If success, *@pevent is updated to point to the event name part of the
+ * original string, or NULL if there is no event name.
+ * Also, *@pgroup is updated to point to the parsed group which is stored
+ * in @buf, or NULL if there is no group name.
+ */
int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
char *buf, int offset)
{
@@ -657,7 +675,7 @@ static int parse_btf_arg(char *varname,
ret = query_btf_context(ctx);
if (ret < 0 || ctx->nr_params == 0) {
trace_probe_log_err(ctx->offset, NO_BTF_ENTRY);
- return PTR_ERR(params);
+ return -ENOENT;
}
}
params = ctx->params;
@@ -779,6 +797,36 @@ static int check_prepare_btf_string_fetch(char *typename,
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+static void store_entry_arg_at(struct fetch_insn *code, int argnum, int offset)
+{
+ code[0].op = FETCH_OP_ARG;
+ code[0].param = argnum;
+ code[1].op = FETCH_OP_ST_EDATA;
+ code[1].offset = offset;
+}
+
+static int get_entry_arg_max_offset(struct probe_entry_arg *earg)
+{
+ int i, max_offset = 0;
+
+ /*
+ * earg->code[] array has an operation sequence which is run in
+ * the entry handler.
+ * The sequence stopped by FETCH_OP_END and each data stored in
+ * the entry data buffer by FETCH_OP_ST_EDATA. The FETCH_OP_ST_EDATA
+ * stores the data at the data buffer + its offset, and all data are
+ * "unsigned long" size. The offset must be increased when a data is
+ * stored. Thus we need to find the last FETCH_OP_ST_EDATA in the
+ * code array.
+ */
+ for (i = 0; i < earg->size - 1 && earg->code[i].op != FETCH_OP_END; i++) {
+ if (earg->code[i].op == FETCH_OP_ST_EDATA)
+ if (earg->code[i].offset > max_offset)
+ max_offset = earg->code[i].offset;
+ }
+ return max_offset;
+}
+
/*
* Add the entry code to store the 'argnum'th parameter and return the offset
* in the entry data buffer where the data will be stored.
@@ -786,8 +834,7 @@ static int check_prepare_btf_string_fetch(char *typename,
static int __store_entry_arg(struct trace_probe *tp, int argnum)
{
struct probe_entry_arg *earg = tp->entry_arg;
- bool match = false;
- int i, offset;
+ int i, offset, last_offset = 0;
if (!earg) {
earg = kzalloc(sizeof(*tp->entry_arg), GFP_KERNEL);
@@ -804,78 +851,59 @@ static int __store_entry_arg(struct trace_probe *tp, int argnum)
for (i = 0; i < earg->size; i++)
earg->code[i].op = FETCH_OP_END;
tp->entry_arg = earg;
+ store_entry_arg_at(earg->code, argnum, 0);
+ return 0;
}
/*
- * The entry code array is repeating the pair of
- * [FETCH_OP_ARG(argnum)][FETCH_OP_ST_EDATA(offset of entry data buffer)]
- * and the rest of entries are filled with [FETCH_OP_END].
+ * NOTE: if anyone change the following rule, please rewrite this.
+ * The entry code array is filled with the pair of
*
- * To reduce the redundant function parameter fetching, we scan the entry
- * code array to find the FETCH_OP_ARG which already fetches the 'argnum'
- * parameter. If it doesn't match, update 'offset' to find the last
- * offset.
- * If we find the FETCH_OP_END without matching FETCH_OP_ARG entry, we
- * will save the entry with FETCH_OP_ARG and FETCH_OP_ST_EDATA, and
- * return data offset so that caller can find the data offset in the entry
- * data buffer.
+ * [FETCH_OP_ARG(argnum)]
+ * [FETCH_OP_ST_EDATA(offset of entry data buffer)]
+ *
+ * and the rest of entries are filled with [FETCH_OP_END].
+ * The offset should be incremented, thus the last pair should
+ * have the largest offset.
*/
- offset = 0;
- for (i = 0; i < earg->size - 1; i++) {
- switch (earg->code[i].op) {
- case FETCH_OP_END:
- earg->code[i].op = FETCH_OP_ARG;
- earg->code[i].param = argnum;
- earg->code[i + 1].op = FETCH_OP_ST_EDATA;
- earg->code[i + 1].offset = offset;
- return offset;
- case FETCH_OP_ARG:
- match = (earg->code[i].param == argnum);
- break;
- case FETCH_OP_ST_EDATA:
- offset = earg->code[i].offset;
- if (match)
- return offset;
- offset += sizeof(unsigned long);
- break;
- default:
- break;
- }
+
+ /* Search the offset for the sprcified argnum. */
+ for (i = 0; i < earg->size - 1 && earg->code[i].op != FETCH_OP_END; i += 2) {
+ if (WARN_ON_ONCE(earg->code[i].op != FETCH_OP_ARG))
+ return -EINVAL;
+
+ if (earg->code[i].param != argnum)
+ continue;
+
+ if (WARN_ON_ONCE(earg->code[i + 1].op != FETCH_OP_ST_EDATA))
+ return -EINVAL;
+
+ return earg->code[i + 1].offset;
+ }
+ /* Not found, append new entry if possible. */
+ if (i >= earg->size - 1)
+ return -ENOSPC;
+
+ /* The last entry must have the largest offset. */
+ if (i != 0) {
+ if (WARN_ON_ONCE(earg->code[i - 1].op != FETCH_OP_ST_EDATA))
+ return -EINVAL;
+ last_offset = earg->code[i - 1].offset;
}
- return -ENOSPC;
+
+ offset = last_offset + sizeof(unsigned long);
+ store_entry_arg_at(&earg->code[i], argnum, offset);
+ return offset;
}
int traceprobe_get_entry_data_size(struct trace_probe *tp)
{
struct probe_entry_arg *earg = tp->entry_arg;
- int i, size = 0;
if (!earg)
return 0;
- /*
- * earg->code[] array has an operation sequence which is run in
- * the entry handler.
- * The sequence stopped by FETCH_OP_END and each data stored in
- * the entry data buffer by FETCH_OP_ST_EDATA. The FETCH_OP_ST_EDATA
- * stores the data at the data buffer + its offset, and all data are
- * "unsigned long" size. The offset must be increased when a data is
- * stored. Thus we need to find the last FETCH_OP_ST_EDATA in the
- * code array.
- */
- for (i = 0; i < earg->size; i++) {
- switch (earg->code[i].op) {
- case FETCH_OP_END:
- goto out;
- case FETCH_OP_ST_EDATA:
- size = earg->code[i].offset + sizeof(unsigned long);
- break;
- default:
- break;
- }
- }
-out:
- return size;
+ return get_entry_arg_max_offset(earg) + sizeof(unsigned long);
}
void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs)
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 854e5668f5ee..842383fbc03b 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -10,20 +10,22 @@
* Author: Srikar Dronamraju
*/
+#include <linux/bitops.h>
+#include <linux/btf.h>
+#include <linux/cleanup.h>
+#include <linux/kprobes.h>
+#include <linux/limits.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/smp.h>
-#include <linux/tracefs.h>
-#include <linux/types.h>
#include <linux/string.h>
-#include <linux/ptrace.h>
-#include <linux/perf_event.h>
-#include <linux/kprobes.h>
#include <linux/stringify.h>
-#include <linux/limits.h>
+#include <linux/tracefs.h>
+#include <linux/types.h>
#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/btf.h>
+
#include <asm/bitsperlong.h>
#include "trace.h"
@@ -438,6 +440,14 @@ extern void traceprobe_free_probe_arg(struct probe_arg *arg);
* this MUST be called for clean up the context and return a resource.
*/
void traceprobe_finish_parse(struct traceprobe_parse_context *ctx);
+static inline void traceprobe_free_parse_ctx(struct traceprobe_parse_context *ctx)
+{
+ traceprobe_finish_parse(ctx);
+ kfree(ctx);
+}
+
+DEFINE_FREE(traceprobe_parse_context, struct traceprobe_parse_context *,
+ if (_T) traceprobe_free_parse_ctx(_T))
extern int traceprobe_split_symbol_offset(char *symbol, long *offset);
int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index a0db3404f7f7..bf1cb80742ae 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -83,14 +83,14 @@ func_prolog_preempt_disable(struct trace_array *tr,
goto out_enable;
*data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_inc_return(&(*data)->disabled);
+ disabled = local_inc_return(&(*data)->disabled);
if (unlikely(disabled != 1))
goto out;
return 1;
out:
- atomic_dec(&(*data)->disabled);
+ local_dec(&(*data)->disabled);
out_enable:
preempt_enable_notrace();
@@ -144,7 +144,7 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
*calltime = trace_clock_local();
ret = __trace_graph_entry(tr, trace, trace_ctx);
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
preempt_enable_notrace();
return ret;
@@ -173,7 +173,7 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace,
return;
__trace_graph_return(tr, trace, trace_ctx, *calltime, rettime);
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
preempt_enable_notrace();
return;
@@ -243,7 +243,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
trace_function(tr, ip, parent_ip, trace_ctx, fregs);
local_irq_restore(flags);
- atomic_dec(&data->disabled);
+ local_dec(&data->disabled);
preempt_enable_notrace();
}
@@ -471,7 +471,7 @@ probe_wakeup_sched_switch(void *ignore, bool preempt,
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
- disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
+ disabled = local_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
if (likely(disabled != 1))
goto out;
@@ -508,7 +508,7 @@ out_unlock:
arch_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
out:
- atomic_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
+ local_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
}
static void __wakeup_reset(struct trace_array *tr)
@@ -563,7 +563,7 @@ probe_wakeup(void *ignore, struct task_struct *p)
(!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio)))
return;
- disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
+ disabled = local_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
if (unlikely(disabled != 1))
goto out;
@@ -610,7 +610,7 @@ probe_wakeup(void *ignore, struct task_struct *p)
out_locked:
arch_spin_unlock(&wakeup_lock);
out:
- atomic_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
+ local_dec(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled);
}
static void start_wakeup_tracer(struct trace_array *tr)
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 14c6f272c4d8..0aa2514a6593 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -32,7 +32,7 @@ static arch_spinlock_t stack_trace_max_lock =
DEFINE_PER_CPU(int, disable_stack_tracer);
static DEFINE_MUTEX(stack_sysctl_mutex);
-int stack_tracer_enabled;
+static int stack_tracer_enabled;
static void print_max_stack(void)
{
@@ -542,7 +542,7 @@ static __init int enable_stacktrace(char *str)
int len;
if ((len = str_has_prefix(str, "_filter=")))
- strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE);
+ strscpy(stack_trace_filter_buf, str + len);
stack_tracer_enabled = 1;
return 1;
@@ -578,3 +578,23 @@ static __init int stack_trace_init(void)
}
device_initcall(stack_trace_init);
+
+
+static const struct ctl_table trace_stack_sysctl_table[] = {
+ {
+ .procname = "stack_tracer_enabled",
+ .data = &stack_tracer_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = stack_trace_sysctl,
+ },
+};
+
+static int __init init_trace_stack_sysctls(void)
+{
+ register_sysctl_init("kernel", trace_stack_sysctl_table);
+ return 0;
+}
+subsys_initcall(init_trace_stack_sysctls);
+
+
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 35cf76c75dd7..8b0bcc0d8f41 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -8,17 +8,19 @@
#define pr_fmt(fmt) "trace_uprobe: " fmt
#include <linux/bpf-cgroup.h>
-#include <linux/security.h>
+#include <linux/cleanup.h>
#include <linux/ctype.h>
+#include <linux/filter.h>
#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <linux/uprobes.h>
#include <linux/namei.h>
-#include <linux/string.h>
-#include <linux/rculist.h>
-#include <linux/filter.h>
#include <linux/percpu.h>
+#include <linux/rculist.h>
+#include <linux/security.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/uprobes.h>
+#include "trace.h"
#include "trace_dynevent.h"
#include "trace_probe.h"
#include "trace_probe_tmpl.h"
@@ -537,15 +539,15 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
*/
static int __trace_uprobe_create(int argc, const char **argv)
{
- struct trace_uprobe *tu;
const char *event = NULL, *group = UPROBE_EVENT_SYSTEM;
char *arg, *filename, *rctr, *rctr_end, *tmp;
- char buf[MAX_EVENT_NAME_LEN];
- char gbuf[MAX_EVENT_NAME_LEN];
- enum probe_print_type ptype;
- struct path path;
unsigned long offset, ref_ctr_offset;
+ char *gbuf __free(kfree) = NULL;
+ char *buf __free(kfree) = NULL;
+ enum probe_print_type ptype;
+ struct trace_uprobe *tu;
bool is_return = false;
+ struct path path;
int i, ret;
ref_ctr_offset = 0;
@@ -653,6 +655,10 @@ static int __trace_uprobe_create(int argc, const char **argv)
/* setup a probe */
trace_probe_log_set_index(0);
if (event) {
+ gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+ if (!gbuf)
+ goto fail_mem;
+
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
@@ -664,15 +670,16 @@ static int __trace_uprobe_create(int argc, const char **argv)
char *ptr;
tail = kstrdup(kbasename(filename), GFP_KERNEL);
- if (!tail) {
- ret = -ENOMEM;
- goto fail_address_parse;
- }
+ if (!tail)
+ goto fail_mem;
ptr = strpbrk(tail, ".-_");
if (ptr)
*ptr = '\0';
+ buf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL);
+ if (!buf)
+ goto fail_mem;
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset);
event = buf;
kfree(tail);
@@ -695,13 +702,16 @@ static int __trace_uprobe_create(int argc, const char **argv)
/* parse arguments */
for (i = 0; i < argc; i++) {
- struct traceprobe_parse_context ctx = {
- .flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER,
- };
+ struct traceprobe_parse_context *ctx __free(traceprobe_parse_context)
+ = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ ctx->flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER;
trace_probe_log_set_index(i + 2);
- ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], &ctx);
- traceprobe_finish_parse(&ctx);
+ ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], ctx);
if (ret)
goto error;
}
@@ -721,6 +731,9 @@ out:
trace_probe_log_clear();
return ret;
+fail_mem:
+ ret = -ENOMEM;
+
fail_address_parse:
trace_probe_log_clear();
path_put(&path);
@@ -1489,7 +1502,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
: BPF_FD_TYPE_UPROBE;
*filename = tu->filename;
*probe_offset = tu->offset;
- *probe_addr = 0;
+ *probe_addr = tu->ref_ctr_offset;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */