From 267fb27352b6fc9fdbad753127a239f75618ecbc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 15:50:32 +0100 Subject: perf: Reduce stack usage of perf_output_begin() __perf_output_begin() has an on-stack struct perf_sample_data in the unlikely case it needs to generate a LOST record. However, every call to perf_output_begin() must already have a perf_sample_data on-stack. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151954.985416146@infradead.org --- kernel/events/core.c | 32 +++++++++++++++++--------------- kernel/events/ring_buffer.c | 20 +++++++++++--------- 2 files changed, 28 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 5a29ab09e72d..fc681c7c1e03 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7186,6 +7186,7 @@ __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, int (*output_begin)(struct perf_output_handle *, + struct perf_sample_data *, struct perf_event *, unsigned int)) { @@ -7198,7 +7199,7 @@ __perf_event_output(struct perf_event *event, perf_prepare_sample(&header, data, event, regs); - err = output_begin(&handle, event, header.size); + err = output_begin(&handle, data, event, header.size); if (err) goto exit; @@ -7264,7 +7265,7 @@ perf_event_read_event(struct perf_event *event, int ret; perf_event_header__init_id(&read_event.header, &sample, event); - ret = perf_output_begin(&handle, event, read_event.header.size); + ret = perf_output_begin(&handle, &sample, event, read_event.header.size); if (ret) return; @@ -7533,7 +7534,7 @@ static void perf_event_task_output(struct perf_event *event, perf_event_header__init_id(&task_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, task_event->event_id.header.size); if (ret) goto out; @@ -7636,7 +7637,7 @@ static void perf_event_comm_output(struct perf_event *event, return; perf_event_header__init_id(&comm_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, comm_event->event_id.header.size); if (ret) @@ -7736,7 +7737,7 @@ static void perf_event_namespaces_output(struct perf_event *event, perf_event_header__init_id(&namespaces_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, namespaces_event->event_id.header.size); if (ret) goto out; @@ -7863,7 +7864,7 @@ static void perf_event_cgroup_output(struct perf_event *event, void *data) perf_event_header__init_id(&cgroup_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, cgroup_event->event_id.header.size); if (ret) goto out; @@ -7989,7 +7990,7 @@ static void perf_event_mmap_output(struct perf_event *event, } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, mmap_event->event_id.header.size); if (ret) goto out; @@ -8299,7 +8300,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head, int ret; perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, event, rec.header.size); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); if (ret) return; @@ -8333,7 +8334,7 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost) perf_event_header__init_id(&lost_samples_event.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, lost_samples_event.header.size); if (ret) return; @@ -8388,7 +8389,7 @@ static void perf_event_switch_output(struct perf_event *event, void *data) perf_event_header__init_id(&se->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, se->event_id.header.size); + ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size); if (ret) return; @@ -8463,7 +8464,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_event_header__init_id(&throttle_event.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, throttle_event.header.size); if (ret) return; @@ -8506,7 +8507,7 @@ static void perf_event_ksymbol_output(struct perf_event *event, void *data) perf_event_header__init_id(&ksymbol_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, ksymbol_event->event_id.header.size); if (ret) return; @@ -8596,7 +8597,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data) perf_event_header__init_id(&bpf_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, data, event, bpf_event->event_id.header.size); if (ret) return; @@ -8705,7 +8706,8 @@ static void perf_event_text_poke_output(struct perf_event *event, void *data) perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size); + ret = perf_output_begin(&handle, &sample, event, + text_poke_event->event_id.header.size); if (ret) return; @@ -8786,7 +8788,7 @@ static void perf_log_itrace_start(struct perf_event *event) rec.tid = perf_event_tid(event, current); perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, event, rec.header.size); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); if (ret) return; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 192b8abc6330..ef91ae75ca56 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -147,6 +147,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail, static __always_inline int __perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size, bool backward) { @@ -237,18 +238,16 @@ __perf_output_begin(struct perf_output_handle *handle, handle->size = (1UL << page_shift) - offset; if (unlikely(have_lost)) { - struct perf_sample_data sample_data; - lost_event.header.size = sizeof(lost_event); lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; lost_event.id = event->id; lost_event.lost = local_xchg(&rb->lost, 0); - perf_event_header__init_id(&lost_event.header, - &sample_data, event); + /* XXX mostly redundant; @data is already fully initializes */ + perf_event_header__init_id(&lost_event.header, data, event); perf_output_put(handle, lost_event); - perf_event__output_id_sample(event, handle, &sample_data); + perf_event__output_id_sample(event, handle, data); } return 0; @@ -263,22 +262,25 @@ out: } int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, unsigned int size) + struct perf_sample_data *data, + struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, false); + return __perf_output_begin(handle, data, event, size, false); } int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, true); + return __perf_output_begin(handle, data, event, size, true); } int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, + return __perf_output_begin(handle, data, event, size, unlikely(is_write_backward(event))); } -- cgit From ce0f17fc93f63ee91428af10b7b2ddef38cd19e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:49:45 +0100 Subject: perf: Fix get_recursion_context() One should use in_serving_softirq() to detect SoftIRQ context. Fixes: 96f6d4444302 ("perf_counter: avoid recursion") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201030151955.120572175@infradead.org --- kernel/events/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/events/internal.h b/kernel/events/internal.h index fcbf5616a441..402054e755f2 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -211,7 +211,7 @@ static inline int get_recursion_context(int *recursion) rctx = 3; else if (in_irq()) rctx = 2; - else if (in_softirq()) + else if (in_serving_softirq()) rctx = 1; else rctx = 0; -- cgit From 09da9c81253dd8e43e0d2d7cea02de6f9f19499d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 13:43:16 +0100 Subject: perf: Optimize get_recursion_context() "Look ma, no branches!" Signed-off-by: Peter Zijlstra (Intel) Acked-by: Jesper Dangaard Brouer Link: https://lkml.kernel.org/r/20201030151955.187580298@infradead.org --- kernel/events/internal.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 402054e755f2..228801e20788 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -205,16 +205,12 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) static inline int get_recursion_context(int *recursion) { - int rctx; - - if (unlikely(in_nmi())) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_serving_softirq()) - rctx = 1; - else - rctx = 0; + unsigned int pc = preempt_count(); + unsigned char rctx = 0; + + rctx += !!(pc & (NMI_MASK)); + rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); if (recursion[rctx]) return -1; -- cgit From 76a4efa80900fc40e0fdf243b42aec9fb8c35d24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 30 Oct 2020 12:14:21 +0100 Subject: perf/arch: Remove perf_sample_data::regs_user_copy struct perf_sample_data lives on-stack, we should be careful about it's size. Furthermore, the pt_regs copy in there is only because x86_64 is a trainwreck, solve it differently. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steven Rostedt Link: https://lkml.kernel.org/r/20201030151955.258178461@infradead.org --- kernel/events/core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index fc681c7c1e03..d67c9cbb0f6a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6374,14 +6374,13 @@ perf_output_sample_regs(struct perf_output_handle *handle, } static void perf_sample_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; } else if (!(current->flags & PF_KTHREAD)) { - perf_get_regs_user(regs_user, regs, regs_user_copy); + perf_get_regs_user(regs_user, regs); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; regs_user->regs = NULL; @@ -7083,8 +7082,7 @@ void perf_prepare_sample(struct perf_event_header *header, } if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) - perf_sample_regs_user(&data->regs_user, regs, - &data->regs_user_copy); + perf_sample_regs_user(&data->regs_user, regs); if (sample_type & PERF_SAMPLE_REGS_USER) { /* regs dump ABI info */ -- cgit From 8c7855d82933bab7fa5e96f0e568fc125c2e1ab4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Oct 2020 16:28:25 +0100 Subject: perf: Simplify group_sched_out() Since event_sched_out() clears cpuctx->exclusive upon removal of an exclusive event (and only group leaders can be exclusive), there is no point in group_sched_out() trying to do it too. It is impossible for cpuctx->exclusive to still be set here. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201029162901.904060564@infradead.org --- kernel/events/core.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index d67c9cbb0f6a..9a5736617a82 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2312,9 +2312,6 @@ group_sched_out(struct perf_event *group_event, event_sched_out(event, cpuctx, ctx); perf_pmu_enable(ctx->pmu); - - if (group_event->attr.exclusive) - cpuctx->exclusive = 0; } #define DETACH_GROUP 0x01UL -- cgit From 251ff2d49347793d348babcff745289b11910e96 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Oct 2020 16:29:15 +0100 Subject: perf: Simplify group_sched_in() Collate the error paths. Code duplication only leads to divergence and extra bugs. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201029162901.972161394@infradead.org --- kernel/events/core.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 9a5736617a82..f0e526866a1c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2580,11 +2580,8 @@ group_sched_in(struct perf_event *group_event, pmu->start_txn(pmu, PERF_PMU_TXN_ADD); - if (event_sched_in(group_event, cpuctx, ctx)) { - pmu->cancel_txn(pmu); - perf_mux_hrtimer_restart(cpuctx); - return -EAGAIN; - } + if (event_sched_in(group_event, cpuctx, ctx)) + goto error; /* * Schedule in siblings as one group (if any): @@ -2613,10 +2610,9 @@ group_error: } event_sched_out(group_event, cpuctx, ctx); +error: pmu->cancel_txn(pmu); - perf_mux_hrtimer_restart(cpuctx); - return -EAGAIN; } -- cgit From 2714c3962f304d031d5016c963c4b459337b0749 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Oct 2020 16:29:53 +0100 Subject: perf: Fix event multiplexing for exclusive groups Commit 9e6302056f80 ("perf: Use hrtimers for event multiplexing") placed the hrtimer (re)start call in the wrong place. Instead of capturing all scheduling failures, it only considered the PMU failure. The result is that groups using perf_event_attr::exclusive are no longer rotated. Fixes: 9e6302056f80 ("perf: Use hrtimers for event multiplexing") Reported-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201029162902.038667689@infradead.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index f0e526866a1c..00be48acdc36 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2612,7 +2612,6 @@ group_error: error: pmu->cancel_txn(pmu); - perf_mux_hrtimer_restart(cpuctx); return -EAGAIN; } @@ -3672,6 +3671,7 @@ static int merge_sched_in(struct perf_event *event, void *data) *can_add_hw = 0; ctx->rotate_necessary = 1; + perf_mux_hrtimer_restart(cpuctx); } return 0; -- cgit From 1908dc911792067287458fdb0800f036f4f4e0f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 Oct 2020 16:32:22 +0100 Subject: perf: Tweak perf_event_attr::exclusive semantics Currently perf_event_attr::exclusive can be used to ensure an event(group) is the sole group scheduled on the PMU. One consequence is that when you have a pinned event (say the watchdog) you can no longer have regular exclusive event(group)s. Inspired by the fact that !pinned events are considered less strict, allow !pinned,exclusive events to share the PMU with pinned,!exclusive events. Pinned,exclusive is still fully exclusive. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20201029162902.105962225@infradead.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 00be48acdc36..dc568ca295bd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2637,7 +2637,7 @@ static int group_can_go_on(struct perf_event *event, * If this group is exclusive and there are already * events on the CPU, it can't go on. */ - if (event->attr.exclusive && cpuctx->active_oncpu) + if (event->attr.exclusive && !list_empty(get_event_list(event))) return 0; /* * Otherwise, try to add it if all previous groups were able -- cgit