From 0d6d062ca27ec7ef547712d34dcfcfb952bcef53 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 4 May 2023 16:30:00 +0530 Subject: perf/core: Rework forwarding of {task|cpu}-clock events Currently, PERF_TYPE_SOFTWARE is treated specially since task-clock and cpu-clock events are interfaced through it but internally gets forwarded to their own pmus. Rework this by overwriting event->attr.type in perf_swevent_init() which will cause perf_init_event() to retry with updated type and event will automatically get forwarded to right pmu. With the change, SW pmu no longer needs to be treated specially and can be included in 'pmu_idr' list. Suggested-by: Peter Zijlstra Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230504110003.2548-2-ravi.bangoria@amd.com --- kernel/events/core.c | 77 ++++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 36 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 68baa8194d9f..c01bbe93e291 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6647,7 +6647,7 @@ static void perf_sigtrap(struct perf_event *event) return; send_sig_perf((void __user *)event->pending_addr, - event->attr.type, event->attr.sig_data); + event->orig_type, event->attr.sig_data); } /* @@ -9951,6 +9951,9 @@ static void sw_perf_event_destroy(struct perf_event *event) swevent_hlist_put(); } +static struct pmu perf_cpu_clock; /* fwd declaration */ +static struct pmu perf_task_clock; + static int perf_swevent_init(struct perf_event *event) { u64 event_id = event->attr.config; @@ -9966,7 +9969,10 @@ static int perf_swevent_init(struct perf_event *event) switch (event_id) { case PERF_COUNT_SW_CPU_CLOCK: + event->attr.type = perf_cpu_clock.type; + return -ENOENT; case PERF_COUNT_SW_TASK_CLOCK: + event->attr.type = perf_task_clock.type; return -ENOENT; default: @@ -11086,7 +11092,7 @@ static void cpu_clock_event_read(struct perf_event *event) static int cpu_clock_event_init(struct perf_event *event) { - if (event->attr.type != PERF_TYPE_SOFTWARE) + if (event->attr.type != perf_cpu_clock.type) return -ENOENT; if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) @@ -11107,6 +11113,7 @@ static struct pmu perf_cpu_clock = { .task_ctx_nr = perf_sw_context, .capabilities = PERF_PMU_CAP_NO_NMI, + .dev = PMU_NULL_DEV, .event_init = cpu_clock_event_init, .add = cpu_clock_event_add, @@ -11167,7 +11174,7 @@ static void task_clock_event_read(struct perf_event *event) static int task_clock_event_init(struct perf_event *event) { - if (event->attr.type != PERF_TYPE_SOFTWARE) + if (event->attr.type != perf_task_clock.type) return -ENOENT; if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) @@ -11188,6 +11195,7 @@ static struct pmu perf_task_clock = { .task_ctx_nr = perf_sw_context, .capabilities = PERF_PMU_CAP_NO_NMI, + .dev = PMU_NULL_DEV, .event_init = task_clock_event_init, .add = task_clock_event_add, @@ -11415,31 +11423,31 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) goto unlock; pmu->type = -1; - if (!name) - goto skip_type; + if (WARN_ONCE(!name, "Can not register anonymous pmu.\n")) { + ret = -EINVAL; + goto free_pdc; + } + pmu->name = name; - if (type != PERF_TYPE_SOFTWARE) { - if (type >= 0) - max = type; + if (type >= 0) + max = type; - ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL); - if (ret < 0) - goto free_pdc; + ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL); + if (ret < 0) + goto free_pdc; - WARN_ON(type >= 0 && ret != type); + WARN_ON(type >= 0 && ret != type); - type = ret; - } + type = ret; pmu->type = type; - if (pmu_bus_running) { + if (pmu_bus_running && !pmu->dev) { ret = pmu_dev_alloc(pmu); if (ret) goto free_idr; } -skip_type: ret = -ENOMEM; pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context); if (!pmu->cpu_pmu_context) @@ -11481,16 +11489,7 @@ skip_type: if (!pmu->event_idx) pmu->event_idx = perf_event_idx_default; - /* - * Ensure the TYPE_SOFTWARE PMUs are at the head of the list, - * since these cannot be in the IDR. This way the linear search - * is fast, provided a valid software event is provided. - */ - if (type == PERF_TYPE_SOFTWARE || !name) - list_add_rcu(&pmu->entry, &pmus); - else - list_add_tail_rcu(&pmu->entry, &pmus); - + list_add_rcu(&pmu->entry, &pmus); atomic_set(&pmu->exclusive_cnt, 0); ret = 0; unlock: @@ -11499,12 +11498,13 @@ unlock: return ret; free_dev: - device_del(pmu->dev); - put_device(pmu->dev); + if (pmu->dev && pmu->dev != PMU_NULL_DEV) { + device_del(pmu->dev); + put_device(pmu->dev); + } free_idr: - if (pmu->type != PERF_TYPE_SOFTWARE) - idr_remove(&pmu_idr, pmu->type); + idr_remove(&pmu_idr, pmu->type); free_pdc: free_percpu(pmu->pmu_disable_count); @@ -11525,9 +11525,8 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_rcu(); free_percpu(pmu->pmu_disable_count); - if (pmu->type != PERF_TYPE_SOFTWARE) - idr_remove(&pmu_idr, pmu->type); - if (pmu_bus_running) { + idr_remove(&pmu_idr, pmu->type); + if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) { if (pmu->nr_addr_filters) device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); device_del(pmu->dev); @@ -11601,6 +11600,12 @@ static struct pmu *perf_init_event(struct perf_event *event) idx = srcu_read_lock(&pmus_srcu); + /* + * Save original type before calling pmu->event_init() since certain + * pmus overwrites event->attr.type to forward event to another pmu. + */ + event->orig_type = event->attr.type; + /* Try parent's PMU first: */ if (event->parent && event->parent->pmu) { pmu = event->parent->pmu; @@ -13640,8 +13645,8 @@ void __init perf_event_init(void) perf_event_init_all_cpus(); init_srcu_struct(&pmus_srcu); perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE); - perf_pmu_register(&perf_cpu_clock, NULL, -1); - perf_pmu_register(&perf_task_clock, NULL, -1); + perf_pmu_register(&perf_cpu_clock, "cpu_clock", -1); + perf_pmu_register(&perf_task_clock, "task_clock", -1); perf_tp_register(); perf_event_init_cpu(smp_processor_id()); register_reboot_notifier(&perf_reboot_notifier); @@ -13684,7 +13689,7 @@ static int __init perf_event_sysfs_init(void) goto unlock; list_for_each_entry(pmu, &pmus, entry) { - if (!pmu->name || pmu->type < 0) + if (pmu->dev) continue; ret = pmu_dev_alloc(pmu); -- cgit From 9551fbb64d094cc105964716224adeb7765df8fd Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 4 May 2023 16:30:02 +0530 Subject: perf/core: Remove pmu linear searching code Searching for the right pmu by iterating over all pmus is no longer required since all pmus now *must* be present in the 'pmu_idr' list. So, remove linear searching code. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230504110003.2548-4-ravi.bangoria@amd.com --- kernel/events/core.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index c01bbe93e291..231b187c1a3f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11630,38 +11630,27 @@ static struct pmu *perf_init_event(struct perf_event *event) } again: + ret = -ENOENT; rcu_read_lock(); pmu = idr_find(&pmu_idr, type); rcu_read_unlock(); - if (pmu) { - if (event->attr.type != type && type != PERF_TYPE_RAW && - !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)) - goto fail; - - ret = perf_try_init_event(pmu, event); - if (ret == -ENOENT && event->attr.type != type && !extended_type) { - type = event->attr.type; - goto again; - } + if (!pmu) + goto fail; - if (ret) - pmu = ERR_PTR(ret); + if (event->attr.type != type && type != PERF_TYPE_RAW && + !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)) + goto fail; - goto unlock; + ret = perf_try_init_event(pmu, event); + if (ret == -ENOENT && event->attr.type != type && !extended_type) { + type = event->attr.type; + goto again; } - list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) { - ret = perf_try_init_event(pmu, event); - if (!ret) - goto unlock; - - if (ret != -ENOENT) { - pmu = ERR_PTR(ret); - goto unlock; - } - } fail: - pmu = ERR_PTR(-ENOENT); + if (ret) + pmu = ERR_PTR(ret); + unlock: srcu_read_unlock(&pmus_srcu, idx); -- cgit From 228020b490eda9133c9cb6f59a5ee1278d8c463f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 5 Jun 2023 12:14:01 +0200 Subject: perf: Re-instate the linear PMU search Full revert of commit 9551fbb64d09 ("perf/core: Remove pmu linear searching code"). Some architectures (notably arm/arm64) still relied on the linear search in order to find the PMU that consumes PERF_TYPE_{HARDWARE,HW_CACHE,RAW}. This will need a more thorought audit and cleanup. Reported-by: Nathan Chancellor Reported-by: Krzysztof Kozlowski Acked-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230605101401.GL38236@hirez.programming.kicks-ass.net --- kernel/events/core.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index 231b187c1a3f..c01bbe93e291 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11630,27 +11630,38 @@ static struct pmu *perf_init_event(struct perf_event *event) } again: - ret = -ENOENT; rcu_read_lock(); pmu = idr_find(&pmu_idr, type); rcu_read_unlock(); - if (!pmu) - goto fail; + if (pmu) { + if (event->attr.type != type && type != PERF_TYPE_RAW && + !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)) + goto fail; - if (event->attr.type != type && type != PERF_TYPE_RAW && - !(pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)) - goto fail; + ret = perf_try_init_event(pmu, event); + if (ret == -ENOENT && event->attr.type != type && !extended_type) { + type = event->attr.type; + goto again; + } - ret = perf_try_init_event(pmu, event); - if (ret == -ENOENT && event->attr.type != type && !extended_type) { - type = event->attr.type; - goto again; + if (ret) + pmu = ERR_PTR(ret); + + goto unlock; } -fail: - if (ret) - pmu = ERR_PTR(ret); + list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) { + ret = perf_try_init_event(pmu, event); + if (!ret) + goto unlock; + if (ret != -ENOENT) { + pmu = ERR_PTR(ret); + goto unlock; + } + } +fail: + pmu = ERR_PTR(-ENOENT); unlock: srcu_read_unlock(&pmus_srcu, idx); -- cgit