From a02026bf9da13cd44fb444857d5aebc934e1af5a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 6 Sep 2023 09:02:56 -0700 Subject: irqchip/gic-v3: Enable support for SGIs to act as NMIs As of commit 6abbd6988971 ("irqchip/gic, gic-v3: Make SGIs use handle_percpu_devid_irq()") SGIs are treated the same as PPIs/EPPIs and use handle_percpu_devid_irq() by default. Unfortunately, handle_percpu_devid_irq() isn't NMI safe, and so to run in an NMI context those should use handle_percpu_devid_fasteoi_nmi(). In order to accomplish this, we just have to make room for SGIs in the array of refcounts that keeps track of which interrupts are set as NMI. We also rename the array and create a new indexing scheme that accounts for SGIs. Also, enable NMI support prior to gic_smp_init() as allocation of SGIs as IRQs/NMIs happen as part of this routine. Co-developed-by: Sumit Garg Signed-off-by: Sumit Garg Acked-by: Mark Rutland Tested-by: Chen-Yu Tsai Signed-off-by: Douglas Anderson Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20230906090246.v13.1.I1223c11c88937bd0cbd9b086d4ef216985797302@changeid Signed-off-by: Catalin Marinas --- drivers/irqchip/irq-gic-v3.c | 59 ++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eedfa8e9f077..787ccc880b22 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -78,6 +78,13 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); #define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) #define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer) +/* + * There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs + * are potentially stolen by the secure side. Some code, especially code dealing + * with hwirq IDs, is simplified by accounting for all 16. + */ +#define SGI_NR 16 + /* * The behaviours of RPR and PMR registers differ depending on the value of * SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the @@ -125,8 +132,8 @@ EXPORT_SYMBOL(gic_nonsecure_priorities); __priority; \ }) -/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ -static refcount_t *ppi_nmi_refs; +/* rdist_nmi_refs[n] == number of cpus having the rdist interrupt n set as NMI */ +static refcount_t *rdist_nmi_refs; static struct gic_kvm_info gic_v3_kvm_info __initdata; static DEFINE_PER_CPU(bool, has_rss); @@ -519,9 +526,22 @@ static u32 __gic_get_ppi_index(irq_hw_number_t hwirq) } } -static u32 gic_get_ppi_index(struct irq_data *d) +static u32 __gic_get_rdist_index(irq_hw_number_t hwirq) +{ + switch (__get_intid_range(hwirq)) { + case SGI_RANGE: + case PPI_RANGE: + return hwirq; + case EPPI_RANGE: + return hwirq - EPPI_BASE_INTID + 32; + default: + unreachable(); + } +} + +static u32 gic_get_rdist_index(struct irq_data *d) { - return __gic_get_ppi_index(d->hwirq); + return __gic_get_rdist_index(d->hwirq); } static int gic_irq_nmi_setup(struct irq_data *d) @@ -545,11 +565,14 @@ static int gic_irq_nmi_setup(struct irq_data *d) /* desc lock should already be held */ if (gic_irq_in_rdist(d)) { - u32 idx = gic_get_ppi_index(d); + u32 idx = gic_get_rdist_index(d); - /* Setting up PPI as NMI, only switch handler for first NMI */ - if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) { - refcount_set(&ppi_nmi_refs[idx], 1); + /* + * Setting up a percpu interrupt as NMI, only switch handler + * for first NMI + */ + if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) { + refcount_set(&rdist_nmi_refs[idx], 1); desc->handle_irq = handle_percpu_devid_fasteoi_nmi; } } else { @@ -582,10 +605,10 @@ static void gic_irq_nmi_teardown(struct irq_data *d) /* desc lock should already be held */ if (gic_irq_in_rdist(d)) { - u32 idx = gic_get_ppi_index(d); + u32 idx = gic_get_rdist_index(d); /* Tearing down NMI, only switch handler for last NMI */ - if (refcount_dec_and_test(&ppi_nmi_refs[idx])) + if (refcount_dec_and_test(&rdist_nmi_refs[idx])) desc->handle_irq = handle_percpu_devid_irq; } else { desc->handle_irq = handle_fasteoi_irq; @@ -1279,10 +1302,10 @@ static void gic_cpu_init(void) rbase = gic_data_rdist_sgi_base(); /* Configure SGIs/PPIs as non-secure Group-1 */ - for (i = 0; i < gic_data.ppi_nr + 16; i += 32) + for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32) writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8); - gic_cpu_config(rbase, gic_data.ppi_nr + 16, gic_redist_wait_for_rwp); + gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, gic_redist_wait_for_rwp); /* initialise system registers */ gic_cpu_sys_reg_init(); @@ -1939,12 +1962,13 @@ static void gic_enable_nmi_support(void) return; } - ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL); - if (!ppi_nmi_refs) + rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR, + sizeof(*rdist_nmi_refs), GFP_KERNEL); + if (!rdist_nmi_refs) return; - for (i = 0; i < gic_data.ppi_nr; i++) - refcount_set(&ppi_nmi_refs[i], 0); + for (i = 0; i < gic_data.ppi_nr + SGI_NR; i++) + refcount_set(&rdist_nmi_refs[i], 0); pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", gic_has_relaxed_pmr_sync() ? "relaxed" : "forced"); @@ -2061,6 +2085,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gic_dist_init(); gic_cpu_init(); + gic_enable_nmi_support(); gic_smp_init(); gic_cpu_pm_init(); @@ -2073,8 +2098,6 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gicv2m_init(handle, gic_data.domain); } - gic_enable_nmi_support(); - return 0; out_free: -- cgit From bfc653aa89cb05796d7b4e046600accb442c9b7a Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Mon, 21 Aug 2023 18:16:08 -0500 Subject: perf: arm_cspmu: Separate Arm and vendor module Arm Coresight PMU driver consists of main standard code and vendor backend code. Both are currently built as a single module. This patch adds vendor registration API to separate the two to keep things modular. The main driver requests each known backend module during initialization and defer device binding process. The backend module then registers an init callback to the main driver and continue the device driver binding process. Signed-off-by: Besar Wicaksono Reviewed-by: Suzuki K Poulose Reviewed-and-tested-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230821231608.50911-1-bwicaksono@nvidia.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/Kconfig | 9 +- drivers/perf/arm_cspmu/Makefile | 6 +- drivers/perf/arm_cspmu/arm_cspmu.c | 168 +++++++++++++++++++++++++++------- drivers/perf/arm_cspmu/arm_cspmu.h | 25 ++++- drivers/perf/arm_cspmu/nvidia_cspmu.c | 34 ++++++- drivers/perf/arm_cspmu/nvidia_cspmu.h | 17 ---- 6 files changed, 199 insertions(+), 60 deletions(-) delete mode 100644 drivers/perf/arm_cspmu/nvidia_cspmu.h (limited to 'drivers') diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig index 25d25ded0983..d5f787d22234 100644 --- a/drivers/perf/arm_cspmu/Kconfig +++ b/drivers/perf/arm_cspmu/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU tristate "ARM Coresight Architecture PMU" @@ -10,3 +10,10 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU based on ARM CoreSight PMU architecture. Note that this PMU architecture does not have relationship with the ARM CoreSight Self-Hosted Tracing. + +config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU + tristate "NVIDIA Coresight Architecture PMU" + depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU + help + Provides NVIDIA specific attributes for performance monitoring unit + (PMU) devices based on ARM CoreSight PMU architecture. diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile index fedb17df982d..0309d2ff264a 100644 --- a/drivers/perf/arm_cspmu/Makefile +++ b/drivers/perf/arm_cspmu/Makefile @@ -1,6 +1,8 @@ -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o -arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o +arm_cspmu_module-y := arm_cspmu.o + +obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index e2b7827c4563..c59f1e5a35a3 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -16,7 +16,7 @@ * The user should refer to the vendor technical documentation to get details * about the supported events. * - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * */ @@ -26,11 +26,11 @@ #include #include #include +#include #include #include #include "arm_cspmu.h" -#include "nvidia_cspmu.h" #define PMUNAME "arm_cspmu" #define DRVNAME "arm-cs-arch-pmu" @@ -112,11 +112,10 @@ */ #define HILOHI_MAX_POLL 1000 -/* JEDEC-assigned JEP106 identification code */ -#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B - static unsigned long arm_cspmu_cpuhp_state; +static DEFINE_MUTEX(arm_cspmu_lock); + static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev) { return *(struct acpi_apmt_node **)dev_get_platdata(dev); @@ -373,27 +372,37 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = { .attrs = arm_cspmu_cpumask_attrs, }; -struct impl_match { - u32 pmiidr; - u32 mask; - int (*impl_init_ops)(struct arm_cspmu *cspmu); -}; - -static const struct impl_match impl_match[] = { +static struct arm_cspmu_impl_match impl_match[] = { { - .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA, - .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, - .impl_init_ops = nv_cspmu_init_ops + .module_name = "nvidia_cspmu", + .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA, + .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .module = NULL, + .impl_init_ops = NULL, }, - {} + {0} }; +static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr) +{ + struct arm_cspmu_impl_match *match = impl_match; + + for (; match->pmiidr_val; match++) { + u32 mask = match->pmiidr_mask; + + if ((match->pmiidr_val & mask) == (pmiidr & mask)) + return match; + } + + return NULL; +} + static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) { - int ret; + int ret = 0; struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev); - const struct impl_match *match = impl_match; + struct arm_cspmu_impl_match *match; /* * Get PMU implementer and product id from APMT node. @@ -405,17 +414,36 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) readl(cspmu->base0 + PMIIDR); /* Find implementer specific attribute ops. */ - for (; match->pmiidr; match++) { - const u32 mask = match->mask; + match = arm_cspmu_impl_match_get(cspmu->impl.pmiidr); + + /* Load implementer module and initialize the callbacks. */ + if (match) { + mutex_lock(&arm_cspmu_lock); + + if (match->impl_init_ops) { + /* Prevent unload until PMU registration is done. */ + if (try_module_get(match->module)) { + cspmu->impl.module = match->module; + cspmu->impl.match = match; + ret = match->impl_init_ops(cspmu); + if (ret) + module_put(match->module); + } else { + WARN(1, "arm_cspmu failed to get module: %s\n", + match->module_name); + ret = -EINVAL; + } + } else { + request_module_nowait(match->module_name); + ret = -EPROBE_DEFER; + } - if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) { - ret = match->impl_init_ops(cspmu); - if (ret) - return ret; + mutex_unlock(&arm_cspmu_lock); - break; - } - } + if (ret) + return ret; + } else + cspmu->impl.module = THIS_MODULE; /* Use default callbacks if implementer doesn't provide one. */ CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs); @@ -478,11 +506,6 @@ arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu) struct attribute_group **attr_groups = NULL; struct device *dev = cspmu->dev; const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; - int ret; - - ret = arm_cspmu_init_impl_ops(cspmu); - if (ret) - return NULL; cspmu->identifier = impl_ops->get_identifier(cspmu); cspmu->name = impl_ops->get_name(cspmu); @@ -1149,7 +1172,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu) cspmu->pmu = (struct pmu){ .task_ctx_nr = perf_invalid_context, - .module = THIS_MODULE, + .module = cspmu->impl.module, .pmu_enable = arm_cspmu_enable, .pmu_disable = arm_cspmu_disable, .event_init = arm_cspmu_event_init, @@ -1196,11 +1219,17 @@ static int arm_cspmu_device_probe(struct platform_device *pdev) if (ret) return ret; - ret = arm_cspmu_register_pmu(cspmu); + ret = arm_cspmu_init_impl_ops(cspmu); if (ret) return ret; - return 0; + ret = arm_cspmu_register_pmu(cspmu); + + /* Matches arm_cspmu_init_impl_ops() above. */ + if (cspmu->impl.module != THIS_MODULE) + module_put(cspmu->impl.module); + + return ret; } static int arm_cspmu_device_remove(struct platform_device *pdev) @@ -1300,6 +1329,75 @@ static void __exit arm_cspmu_exit(void) cpuhp_remove_multi_state(arm_cspmu_cpuhp_state); } +int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match) +{ + struct arm_cspmu_impl_match *match; + int ret = 0; + + match = arm_cspmu_impl_match_get(impl_match->pmiidr_val); + + if (match) { + mutex_lock(&arm_cspmu_lock); + + if (!match->impl_init_ops) { + match->module = impl_match->module; + match->impl_init_ops = impl_match->impl_init_ops; + } else { + /* Broken match table may contain non-unique entries */ + WARN(1, "arm_cspmu backend already registered for module: %s, pmiidr: 0x%x, mask: 0x%x\n", + match->module_name, + match->pmiidr_val, + match->pmiidr_mask); + + ret = -EINVAL; + } + + mutex_unlock(&arm_cspmu_lock); + + if (!ret) + ret = driver_attach(&arm_cspmu_driver.driver); + } else { + pr_err("arm_cspmu reg failed, unable to find a match for pmiidr: 0x%x\n", + impl_match->pmiidr_val); + + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(arm_cspmu_impl_register); + +static int arm_cspmu_match_device(struct device *dev, const void *match) +{ + struct arm_cspmu *cspmu = platform_get_drvdata(to_platform_device(dev)); + + return (cspmu && cspmu->impl.match == match) ? 1 : 0; +} + +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match) +{ + struct device *dev; + struct arm_cspmu_impl_match *match; + + match = arm_cspmu_impl_match_get(impl_match->pmiidr_val); + + if (WARN_ON(!match)) + return; + + /* Unbind the driver from all matching backend devices. */ + while ((dev = driver_find_device(&arm_cspmu_driver.driver, NULL, + match, arm_cspmu_match_device))) + device_release_driver(dev); + + mutex_lock(&arm_cspmu_lock); + + match->module = NULL; + match->impl_init_ops = NULL; + + mutex_unlock(&arm_cspmu_lock); +} +EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister); + module_init(arm_cspmu_init); module_exit(arm_cspmu_exit); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 83df53d1c132..7936a90ded7f 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 * * ARM CoreSight Architecture PMU driver. - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * */ @@ -69,6 +69,9 @@ #define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0) #define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20) +/* JEDEC-assigned JEP106 identification code */ +#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B + struct arm_cspmu; /* This tracks the events assigned to each counter in the PMU. */ @@ -106,9 +109,23 @@ struct arm_cspmu_impl_ops { struct attribute *attr, int unused); }; +/* Vendor/implementer registration parameter. */ +struct arm_cspmu_impl_match { + /* Backend module. */ + struct module *module; + const char *module_name; + /* PMIIDR value/mask. */ + u32 pmiidr_val; + u32 pmiidr_mask; + /* Callback to vendor backend to init arm_cspmu_impl::ops. */ + int (*impl_init_ops)(struct arm_cspmu *cspmu); +}; + /* Vendor/implementer descriptor. */ struct arm_cspmu_impl { u32 pmiidr; + struct module *module; + struct arm_cspmu_impl_match *match; struct arm_cspmu_impl_ops ops; void *ctx; }; @@ -147,4 +164,10 @@ ssize_t arm_cspmu_sysfs_format_show(struct device *dev, struct device_attribute *attr, char *buf); +/* Register vendor backend. */ +int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match); + +/* Unregister vendor backend. */ +void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match); + #endif /* __ARM_CSPMU_H__ */ diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c index 72ef80caa3c8..0382b702f092 100644 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.c +++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c @@ -1,14 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * */ /* Support for NVIDIA specific attributes. */ +#include #include -#include "nvidia_cspmu.h" +#include "arm_cspmu.h" #define NV_PCIE_PORT_COUNT 10ULL #define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0) @@ -351,7 +352,7 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu, return name; } -int nv_cspmu_init_ops(struct arm_cspmu *cspmu) +static int nv_cspmu_init_ops(struct arm_cspmu *cspmu) { u32 prodid; struct nv_cspmu_ctx *ctx; @@ -395,6 +396,31 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu) return 0; } -EXPORT_SYMBOL_GPL(nv_cspmu_init_ops); + +/* Match all NVIDIA Coresight PMU devices */ +static const struct arm_cspmu_impl_match nv_cspmu_param = { + .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA, + .module = THIS_MODULE, + .impl_init_ops = nv_cspmu_init_ops +}; + +static int __init nvidia_cspmu_init(void) +{ + int ret; + + ret = arm_cspmu_impl_register(&nv_cspmu_param); + if (ret) + pr_err("nvidia_cspmu backend registration error: %d\n", ret); + + return ret; +} + +static void __exit nvidia_cspmu_exit(void) +{ + arm_cspmu_impl_unregister(&nv_cspmu_param); +} + +module_init(nvidia_cspmu_init); +module_exit(nvidia_cspmu_exit); MODULE_LICENSE("GPL v2"); diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h deleted file mode 100644 index 71e18f0dc50b..000000000000 --- a/drivers/perf/arm_cspmu/nvidia_cspmu.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * - */ - -/* Support for NVIDIA specific attributes. */ - -#ifndef __NVIDIA_CSPMU_H__ -#define __NVIDIA_CSPMU_H__ - -#include "arm_cspmu.h" - -/* Allocate NVIDIA descriptor. */ -int nv_cspmu_init_ops(struct arm_cspmu *cspmu); - -#endif /* __NVIDIA_CSPMU_H__ */ -- cgit From 8c282414ca6209977cb6d6cc66470ca2d1e56bf6 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 13 Sep 2023 16:39:38 -0700 Subject: perf: arm_cspmu: Split 64-bit write to 32-bit writes Split the 64-bit register accesses if 64-bit access is not supported by the PMU. Signed-off-by: Ilkka Koskinen Reviewed-by: Besar Wicaksono Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230913233941.9814-2-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index c59f1e5a35a3..5f4f04135a22 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -719,7 +719,10 @@ static void arm_cspmu_write_counter(struct perf_event *event, u64 val) if (use_64b_counter_reg(cspmu)) { offset = counter_offset(sizeof(u64), event->hw.idx); - writeq(val, cspmu->base1 + offset); + if (cspmu->has_atomic_dword) + writeq(val, cspmu->base1 + offset); + else + lo_hi_writeq(val, cspmu->base1 + offset); } else { offset = counter_offset(sizeof(u32), event->hw.idx); -- cgit From 0a7603ab242e9bab530227cf0d0d344d4e334acc Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 13 Sep 2023 16:39:39 -0700 Subject: perf: arm_cspmu: Support implementation specific filters ARM Coresight PMU architecture specification [1] defines PMEVTYPER and PMEVFILT* registers as optional in Chapter 2.1. Moreover, implementers may choose to use PMIMPDEF* registers (offset: 0xD80-> 0xDFF) to filter the events. Add support for those by adding implementation specific filter callback function. [1] https://developer.arm.com/documentation/ihi0091/latest Signed-off-by: Ilkka Koskinen Reviewed-by: Besar Wicaksono Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230913233941.9814-3-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 12 ++++++++---- drivers/perf/arm_cspmu/arm_cspmu.h | 3 +++ 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 5f4f04135a22..2ecbf8d7714b 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -116,6 +116,9 @@ static unsigned long arm_cspmu_cpuhp_state; static DEFINE_MUTEX(arm_cspmu_lock); +static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, + struct hw_perf_event *hwc, u32 filter); + static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev) { return *(struct acpi_apmt_node **)dev_get_platdata(dev); @@ -454,6 +457,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu) CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type); CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter); CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible); + CHECK_DEFAULT_IMPL_OPS(impl_ops, set_ev_filter); return 0; } @@ -815,9 +819,9 @@ static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu, writel(hwc->config, cspmu->base0 + offset); } -static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, - struct hw_perf_event *hwc, - u32 filter) +static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu, + struct hw_perf_event *hwc, + u32 filter) { u32 offset = PMEVFILTR + (4 * hwc->idx); @@ -849,7 +853,7 @@ static void arm_cspmu_start(struct perf_event *event, int pmu_flags) arm_cspmu_set_cc_filter(cspmu, filter); } else { arm_cspmu_set_event(cspmu, hwc); - arm_cspmu_set_ev_filter(cspmu, hwc, filter); + cspmu->impl.ops.set_ev_filter(cspmu, hwc, filter); } hwc->state = 0; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index 7936a90ded7f..e0cca5b4aab9 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -104,6 +104,9 @@ struct arm_cspmu_impl_ops { u32 (*event_type)(const struct perf_event *event); /* Decode filter value from configs */ u32 (*event_filter)(const struct perf_event *event); + /* Set event filter */ + void (*set_ev_filter)(struct arm_cspmu *cspmu, + struct hw_perf_event *hwc, u32 filter); /* Hide/show unsupported events */ umode_t (*event_attr_is_visible)(struct kobject *kobj, struct attribute *attr, int unused); -- cgit From 647d5c5a9e7672e285f54f0e141ee759e69382f2 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 13 Sep 2023 16:39:40 -0700 Subject: perf: arm_cspmu: Support implementation specific validation Some platforms may use e.g. different filtering mechanism and, thus, may need different way to validate the events and group. Signed-off-by: Ilkka Koskinen Reviewed-by: Jonathan Cameron Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230913233941.9814-4-ilkka@os.amperecomputing.com Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/arm_cspmu.c | 8 +++++++- drivers/perf/arm_cspmu/arm_cspmu.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 2ecbf8d7714b..1ba00d640352 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -576,7 +576,7 @@ static void arm_cspmu_disable(struct pmu *pmu) static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events, struct perf_event *event) { - int idx; + int idx, ret; struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu); if (supports_cycle_counter(cspmu)) { @@ -610,6 +610,12 @@ static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events, if (idx >= cspmu->num_logical_ctrs) return -EAGAIN; + if (cspmu->impl.ops.validate_event) { + ret = cspmu->impl.ops.validate_event(cspmu, event); + if (ret) + return ret; + } + set_bit(idx, hw_events->used_ctrs); return idx; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index e0cca5b4aab9..a30c8372214c 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -107,6 +107,9 @@ struct arm_cspmu_impl_ops { /* Set event filter */ void (*set_ev_filter)(struct arm_cspmu *cspmu, struct hw_perf_event *hwc, u32 filter); + /* Implementation specific event validation */ + int (*validate_event)(struct arm_cspmu *cspmu, + struct perf_event *event); /* Hide/show unsupported events */ umode_t (*event_attr_is_visible)(struct kobject *kobj, struct attribute *attr, int unused); -- cgit From a07a594152173a3dd3bdd12fc7d73dbba54cdbca Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Oct 2023 18:00:36 +0100 Subject: arm64: smp: avoid NMI IPIs with broken MediaTek FW Some MediaTek devices have broken firmware which corrupts some GICR registers behind the back of the OS, and pseudo-NMIs cannot be used on these devices. For more details see commit: 44bd78dd2b8897f5 ("irqchip/gic-v3: Disable pseudo NMIs on Mediatek devices w/ firmware issues") We did not take this problem into account in commit: 331a1b3a836c0f38 ("arm64: smp: Add arch support for backtrace using pseudo-NMI") Since that commit arm64's SMP code will try to setup some IPIs as pseudo-NMIs, even on systems with broken FW. The GICv3 code will (rightly) reject attempts to request interrupts as pseudo-NMIs, resulting in boot-time failures. Avoid the problem by taking the broken FW into account when deciding to request IPIs as pseudo-NMIs. The GICv3 driver maintains a static_key named "supports_pseudo_nmis" which is false on systems with broken FW, and we can consult this within ipi_should_be_nmi(). Fixes: 331a1b3a836c ("arm64: smp: Add arch support for backtrace using pseudo-NMI") Reported-by: Chen-Yu Tsai Closes: https://issuetracker.google.com/issues/197061987#comment68 Signed-off-by: Mark Rutland Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Reviewed-by: Marc Zyngier Tested-by: Chen-Yu Tsai Signed-off-by: Catalin Marinas --- drivers/irqchip/irq-gic-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 787ccc880b22..737da1b9aabf 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -106,7 +106,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1 * interrupt. */ -static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); +DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities); EXPORT_SYMBOL(gic_nonsecure_priorities); -- cgit From 53a810ad3c5cde674cac71e629e6d10bfc9d838c Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 13 Sep 2023 16:39:41 -0700 Subject: perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation specific registers to filter events rather than PMEVFILTnR registers. Signed-off-by: Ilkka Koskinen Link: https://lore.kernel.org/r/20230913233941.9814-5-ilkka@os.amperecomputing.com [will: Include linux/io.h in ampere_cspmu.c for writel()] Signed-off-by: Will Deacon --- drivers/perf/arm_cspmu/Kconfig | 10 ++ drivers/perf/arm_cspmu/Makefile | 2 + drivers/perf/arm_cspmu/ampere_cspmu.c | 272 ++++++++++++++++++++++++++++++++++ drivers/perf/arm_cspmu/arm_cspmu.c | 8 + drivers/perf/arm_cspmu/arm_cspmu.h | 1 + 5 files changed, 293 insertions(+) create mode 100644 drivers/perf/arm_cspmu/ampere_cspmu.c (limited to 'drivers') diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig index d5f787d22234..6f4e28fc84a2 100644 --- a/drivers/perf/arm_cspmu/Kconfig +++ b/drivers/perf/arm_cspmu/Kconfig @@ -17,3 +17,13 @@ config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU help Provides NVIDIA specific attributes for performance monitoring unit (PMU) devices based on ARM CoreSight PMU architecture. + +config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU + tristate "Ampere Coresight Architecture PMU" + depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU + help + Provides Ampere specific attributes for performance monitoring unit + (PMU) devices based on ARM CoreSight PMU architecture. + + In the first phase, the driver enables support on MCU PMU used in + AmpereOne SoC family. diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile index 0309d2ff264a..220a734efd54 100644 --- a/drivers/perf/arm_cspmu/Makefile +++ b/drivers/perf/arm_cspmu/Makefile @@ -3,6 +3,8 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o + arm_cspmu_module-y := arm_cspmu.o obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o +obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c new file mode 100644 index 000000000000..f146a455e838 --- /dev/null +++ b/drivers/perf/arm_cspmu/ampere_cspmu.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Ampere SoC PMU (Performance Monitor Unit) + * + * Copyright (c) 2023, Ampere Computing LLC + */ +#include +#include +#include + +#include "arm_cspmu.h" + +#define PMAUXR0 0xD80 +#define PMAUXR1 0xD84 +#define PMAUXR2 0xD88 +#define PMAUXR3 0xD8C + +#define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx)) + +struct ampere_cspmu_ctx { + const char *name; + struct attribute **event_attr; + struct attribute **format_attr; +}; + +static DEFINE_IDA(mcu_pmu_ida); + +#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \ + static inline u32 get_##_name(const struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(_end, _start), \ + event->attr._config); \ + } \ + +SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8); +SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7); +SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23); +SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55); + +static struct attribute *ampereone_mcu_pmu_event_attrs[] = { + ARM_CSPMU_EVENT_ATTR(cycle_count, 0x00), + ARM_CSPMU_EVENT_ATTR(act_sent, 0x01), + ARM_CSPMU_EVENT_ATTR(pre_sent, 0x02), + ARM_CSPMU_EVENT_ATTR(rd_sent, 0x03), + ARM_CSPMU_EVENT_ATTR(rda_sent, 0x04), + ARM_CSPMU_EVENT_ATTR(wr_sent, 0x05), + ARM_CSPMU_EVENT_ATTR(wra_sent, 0x06), + ARM_CSPMU_EVENT_ATTR(pd_entry_vld, 0x07), + ARM_CSPMU_EVENT_ATTR(sref_entry_vld, 0x08), + ARM_CSPMU_EVENT_ATTR(prea_sent, 0x09), + ARM_CSPMU_EVENT_ATTR(pre_sb_sent, 0x0a), + ARM_CSPMU_EVENT_ATTR(ref_sent, 0x0b), + ARM_CSPMU_EVENT_ATTR(rfm_sent, 0x0c), + ARM_CSPMU_EVENT_ATTR(ref_sb_sent, 0x0d), + ARM_CSPMU_EVENT_ATTR(rfm_sb_sent, 0x0e), + ARM_CSPMU_EVENT_ATTR(rd_rda_sent, 0x0f), + ARM_CSPMU_EVENT_ATTR(wr_wra_sent, 0x10), + ARM_CSPMU_EVENT_ATTR(raw_hazard, 0x11), + ARM_CSPMU_EVENT_ATTR(war_hazard, 0x12), + ARM_CSPMU_EVENT_ATTR(waw_hazard, 0x13), + ARM_CSPMU_EVENT_ATTR(rar_hazard, 0x14), + ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard, 0x15), + ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld, 0x16), + ARM_CSPMU_EVENT_ATTR(lprd_req_vld, 0x17), + ARM_CSPMU_EVENT_ATTR(hprd_req_vld, 0x18), + ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld, 0x19), + ARM_CSPMU_EVENT_ATTR(prefetch_tgt, 0x1a), + ARM_CSPMU_EVENT_ATTR(wr_req_vld, 0x1b), + ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld, 0x1c), + ARM_CSPMU_EVENT_ATTR(rd_retry, 0x1d), + ARM_CSPMU_EVENT_ATTR(wr_retry, 0x1e), + ARM_CSPMU_EVENT_ATTR(retry_gnt, 0x1f), + ARM_CSPMU_EVENT_ATTR(rank_change, 0x20), + ARM_CSPMU_EVENT_ATTR(dir_change, 0x21), + ARM_CSPMU_EVENT_ATTR(rank_dir_change, 0x22), + ARM_CSPMU_EVENT_ATTR(rank_active, 0x23), + ARM_CSPMU_EVENT_ATTR(rank_idle, 0x24), + ARM_CSPMU_EVENT_ATTR(rank_pd, 0x25), + ARM_CSPMU_EVENT_ATTR(rank_sref, 0x26), + ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh, 0x27), + ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh, 0x28), + ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh, 0x29), + ARM_CSPMU_EVENT_ATTR(phy_updt_complt, 0x2a), + ARM_CSPMU_EVENT_ATTR(tz_fail, 0x2b), + ARM_CSPMU_EVENT_ATTR(dram_errc, 0x2c), + ARM_CSPMU_EVENT_ATTR(dram_errd, 0x2d), + ARM_CSPMU_EVENT_ATTR(read_data_return, 0x32), + ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta, 0x33), + ARM_CSPMU_EVENT_ATTR(zq_start, 0x34), + ARM_CSPMU_EVENT_ATTR(zq_latch, 0x35), + ARM_CSPMU_EVENT_ATTR(wr_fifo_full, 0x36), + ARM_CSPMU_EVENT_ATTR(info_fifo_full, 0x37), + ARM_CSPMU_EVENT_ATTR(cmd_fifo_full, 0x38), + ARM_CSPMU_EVENT_ATTR(dfi_nop, 0x39), + ARM_CSPMU_EVENT_ATTR(dfi_cmd, 0x3a), + ARM_CSPMU_EVENT_ATTR(rd_run_len, 0x3b), + ARM_CSPMU_EVENT_ATTR(wr_run_len, 0x3c), + + ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT), + NULL, +}; + +static struct attribute *ampereone_mcu_format_attrs[] = { + ARM_CSPMU_FORMAT_EVENT_ATTR, + ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"), + ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"), + ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"), + NULL, +}; + +static struct attribute ** +ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu) +{ + const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu); + + return ctx->event_attr; +} + +static struct attribute ** +ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu) +{ + const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu); + + return ctx->format_attr; +} + +static const char * +ampere_cspmu_get_name(const struct arm_cspmu *cspmu) +{ + const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu); + + return ctx->name; +} + +static u32 ampere_cspmu_event_filter(const struct perf_event *event) +{ + /* + * PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked + * as RES0. Make sure, PMCCFILTR is written zero. + */ + return 0; +} + +static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu, + struct hw_perf_event *hwc, + u32 filter) +{ + struct perf_event *event; + unsigned int idx; + u32 threshold, rank, bank; + + /* + * At this point, all the events have the same filter settings. + * Therefore, take the first event and use its configuration. + */ + idx = find_first_bit(cspmu->hw_events.used_ctrs, + cspmu->cycle_counter_logical_idx); + + event = cspmu->hw_events.events[idx]; + + threshold = get_threshold(event); + rank = get_rank(event); + bank = get_bank(event); + + writel(threshold, cspmu->base0 + PMAUXR0); + writel(rank, cspmu->base0 + PMAUXR1); + writel(bank, cspmu->base0 + PMAUXR2); +} + +static int ampere_cspmu_validate_configs(struct perf_event *event, + struct perf_event *event2) +{ + if (get_threshold(event) != get_threshold(event2) || + get_rank(event) != get_rank(event2) || + get_bank(event) != get_bank(event2)) + return -EINVAL; + + return 0; +} + +static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu, + struct perf_event *new) +{ + struct perf_event *curr, *leader = new->group_leader; + unsigned int idx; + int ret; + + ret = ampere_cspmu_validate_configs(new, leader); + if (ret) + return ret; + + /* We compare the global filter settings to the existing events */ + idx = find_first_bit(cspmu->hw_events.used_ctrs, + cspmu->cycle_counter_logical_idx); + + /* This is the first event, thus any configuration is fine */ + if (idx == cspmu->cycle_counter_logical_idx) + return 0; + + curr = cspmu->hw_events.events[idx]; + + return ampere_cspmu_validate_configs(curr, new); +} + +static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu, + const char *name_pattern) +{ + struct device *dev = cspmu->dev; + int id; + + id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL); + if (id < 0) + return ERR_PTR(id); + + return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id); +} + +static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu) +{ + struct device *dev = cspmu->dev; + struct ampere_cspmu_ctx *ctx; + struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops; + + ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->event_attr = ampereone_mcu_pmu_event_attrs; + ctx->format_attr = ampereone_mcu_format_attrs; + ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d"); + if (IS_ERR_OR_NULL(ctx->name)) + return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM; + + cspmu->impl.ctx = ctx; + + impl_ops->event_filter = ampere_cspmu_event_filter; + impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter; + impl_ops->validate_event = ampere_cspmu_validate_event; + impl_ops->get_name = ampere_cspmu_get_name; + impl_ops->get_event_attrs = ampere_cspmu_get_event_attrs; + impl_ops->get_format_attrs = ampere_cspmu_get_format_attrs; + + return 0; +} + +/* Match all Ampere Coresight PMU devices */ +static const struct arm_cspmu_impl_match ampere_cspmu_param = { + .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE, + .module = THIS_MODULE, + .impl_init_ops = ampere_cspmu_init_ops +}; + +static int __init ampere_cspmu_init(void) +{ + int ret; + + ret = arm_cspmu_impl_register(&ere_cspmu_param); + if (ret) + pr_err("ampere_cspmu backend registration error: %d\n", ret); + + return ret; +} + +static void __exit ampere_cspmu_exit(void) +{ + arm_cspmu_impl_unregister(&ere_cspmu_param); +} + +module_init(ampere_cspmu_init); +module_exit(ampere_cspmu_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c index 1ba00d640352..0e3fe00d741d 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.c +++ b/drivers/perf/arm_cspmu/arm_cspmu.c @@ -383,6 +383,14 @@ static struct arm_cspmu_impl_match impl_match[] = { .module = NULL, .impl_init_ops = NULL, }, + { + .module_name = "ampere_cspmu", + .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE, + .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER, + .module = NULL, + .impl_init_ops = NULL, + }, + {0} }; diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h index a30c8372214c..2fe723555a6b 100644 --- a/drivers/perf/arm_cspmu/arm_cspmu.h +++ b/drivers/perf/arm_cspmu/arm_cspmu.h @@ -71,6 +71,7 @@ /* JEDEC-assigned JEP106 identification code */ #define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B +#define ARM_CSPMU_IMPL_ID_AMPERE 0xA16 struct arm_cspmu; -- cgit From 166b76a073be8e1ffdc2c4db60f3abbbc974a3a3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:24 +0100 Subject: clocksource/drivers/arm_arch_timer: Initialize evtstrm after finalizing cpucaps We attempt to initialize each CPU's arch_timer event stream in arch_timer_evtstrm_enable(), which we call from the arch_timer_starting_cpu() cpu hotplug callback which is registered early in boot. As this is registered before we initialize the system cpucaps, the test for ARM64_HAS_ECV will always be false for CPUs present at boot time, and will only be taken into account for CPUs onlined late (including those which are hotplugged out and in again). Due to this, CPUs present and boot time may not use the intended divider and scale factor to generate the event stream, and may differ from other CPUs. Correct this by only initializing the event stream after cpucaps have been finalized, registering a separate CPU hotplug callback for the event stream configuration. Since the caps must be finalized by this point, use cpus_have_final_cap() to verify this. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Acked-by: Thomas Gleixner Cc: Daniel Lezcano Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- drivers/clocksource/arm_arch_timer.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 7dd2c615bce2..d1e9e556da81 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -917,7 +917,7 @@ static void arch_timer_evtstrm_enable(unsigned int divider) #ifdef CONFIG_ARM64 /* ECV is likely to require a large divider. Use the EVNTIS flag. */ - if (cpus_have_const_cap(ARM64_HAS_ECV) && divider > 15) { + if (cpus_have_final_cap(ARM64_HAS_ECV) && divider > 15) { cntkctl |= ARCH_TIMER_EVT_INTERVAL_SCALE; divider -= 8; } @@ -955,6 +955,30 @@ static void arch_timer_configure_evtstream(void) arch_timer_evtstrm_enable(max(0, lsb)); } +static int arch_timer_evtstrm_starting_cpu(unsigned int cpu) +{ + arch_timer_configure_evtstream(); + return 0; +} + +static int arch_timer_evtstrm_dying_cpu(unsigned int cpu) +{ + cpumask_clear_cpu(smp_processor_id(), &evtstrm_available); + return 0; +} + +static int __init arch_timer_evtstrm_register(void) +{ + if (!arch_timer_evt || !evtstrm_enable) + return 0; + + return cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_EVTSTRM_STARTING, + "clockevents/arm/arch_timer_evtstrm:starting", + arch_timer_evtstrm_starting_cpu, + arch_timer_evtstrm_dying_cpu); +} +core_initcall(arch_timer_evtstrm_register); + static void arch_counter_set_user_access(void) { u32 cntkctl = arch_timer_get_cntkctl(); @@ -1016,8 +1040,6 @@ static int arch_timer_starting_cpu(unsigned int cpu) } arch_counter_set_user_access(); - if (evtstrm_enable) - arch_timer_configure_evtstream(); return 0; } @@ -1164,8 +1186,6 @@ static int arch_timer_dying_cpu(unsigned int cpu) { struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt); - cpumask_clear_cpu(smp_processor_id(), &evtstrm_available); - arch_timer_stop(clk); return 0; } @@ -1279,6 +1299,7 @@ out_unreg_notify: out_free: free_percpu(arch_timer_evt); + arch_timer_evt = NULL; out: return err; } -- cgit From a98a5eac4d69e639e2a3edbac9fce89de4ec39ee Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Oct 2023 11:24:58 +0100 Subject: arm64: Avoid cpus_have_const_cap() for ARM64_WORKAROUND_CAVIUM_23154 In gic_read_iar() we use cpus_have_const_cap() to check for ARM64_WORKAROUND_CAVIUM_23154 but this is not necessary and alternative_has_cap_*() would be preferable. For historical reasons, cpus_have_const_cap() is more complicated than it needs to be. Before cpucaps are finalized, it will perform a bitmap test of the system_cpucaps bitmap, and once cpucaps are finalized it will use an alternative branch. This used to be necessary to handle some race conditions in the window between cpucap detection and the subsequent patching of alternatives and static branches, where different branches could be out-of-sync with one another (or w.r.t. alternative sequences). Now that we use alternative branches instead of static branches, these are all patched atomically w.r.t. one another, and there are only a handful of cases that need special care in the window between cpucap detection and alternative patching. Due to the above, it would be nice to remove cpus_have_const_cap(), and migrate callers over to alternative_has_cap_*(), cpus_have_final_cap(), or cpus_have_cap() depending on when their requirements. This will remove redundant instructions and improve code generation, and will make it easier to determine how each callsite will behave before, during, and after alternative patching. The ARM64_WORKAROUND_CAVIUM_23154 cpucap is detected and patched early on the boot CPU before the GICv3 driver is initialized and hence before gic_read_iar() is ever called. Thus it is not necessary to use cpus_have_const_cap(), and alternative_has_cap() is equivalent. In addition, arm64's gic_read_iar() lives in irq-gic-v3.c purely for historical reasons. It was originally added prior to 32-bit arm support in commit: 6d4e11c5e2e8cd54 ("irqchip/gicv3: Workaround for Cavium ThunderX erratum 23154") When support for 32-bit arm was added, 32-bit arm's gic_read_iar() implementation was placed in , but the arm64 version was kept within irq-gic-v3.c as it depended on a static key local to irq-gic-v3.c and it was easier to add ifdeffery, which is what we did in commit: 7936e914f7b0827c ("irqchip/gic-v3: Refactor the arm64 specific parts") Subsequently the static key was replaced with a cpucap in commit: a4023f682739439b ("arm64: Add hypervisor safe helper for checking constant capabilities") Since that commit there has been no need to keep arm64's gic_read_iar() in irq-gic-v3.c. This patch replaces the use of cpus_have_const_cap() with alternative_has_cap_unlikely(), which will avoid generating code to test the system_cpucaps bitmap and should be better for all subsequent calls at runtime. For consistency, move the arm64-specific gic_read_iar() implementation over to arm64's . The ARM64_WORKAROUND_CAVIUM_23154 cpucap is added to cpucap_is_possible() so that code can be elided entirely when this is not possible. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- drivers/irqchip/irq-gic-v3.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'drivers') diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eedfa8e9f077..6d6c01f8f7b4 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -270,17 +270,6 @@ static void gic_redist_wait_for_rwp(void) gic_do_wait_for_rwp(gic_data_rdist_rd_base(), GICR_CTLR_RWP); } -#ifdef CONFIG_ARM64 - -static u64 __maybe_unused gic_read_iar(void) -{ - if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154)) - return gic_read_iar_cavium_thunderx(); - else - return gic_read_iar_common(); -} -#endif - static void gic_enable_redist(bool enable) { void __iomem *rbase; -- cgit From 32269e09b137ade1e5cb5c8a7d67db1ba9b1e4fe Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 12 Oct 2023 12:35:43 +0200 Subject: perf/amlogic: add missing MODULE_DEVICE_TABLE Add missing MODULE_DEVICE_TABLE macro to let this driver to be automatically loaded as module. Signed-off-by: Marek Szyprowski Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20231012103543.3381326-1-m.szyprowski@samsung.com Signed-off-by: Will Deacon --- drivers/perf/amlogic/meson_g12_ddr_pmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c index 8b643888d503..15d52ab3276a 100644 --- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c +++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c @@ -377,6 +377,7 @@ static const struct of_device_id meson_ddr_pmu_dt_match[] = { }, {} }; +MODULE_DEVICE_TABLE(of, meson_ddr_pmu_dt_match); static struct platform_driver g12_ddr_pmu_driver = { .probe = g12_ddr_pmu_probe, -- cgit From 1f33cdef8ca174661fb0d82d0f74fd0589dc9e46 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Oct 2023 12:29:09 -0500 Subject: drivers/perf: xgene: Use device_get_match_data() Use preferred device_get_match_data() instead of of_match_device() and acpi_match_device() to get the driver match data. With this, adjust the includes to explicitly include the correct headers. Signed-off-by: Rob Herring Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20231009172923.2457844-14-robh@kernel.org Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 9972bfc11a5c..7ce344248dda 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -16,11 +16,9 @@ #include #include #include -#include -#include -#include #include #include +#include #include #include @@ -1731,6 +1729,12 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = { .id = PCP_PMU_V2, }; +#ifdef CONFIG_ACPI +static const struct xgene_pmu_data xgene_pmu_v3_data = { + .id = PCP_PMU_V3, +}; +#endif + static const struct xgene_pmu_ops xgene_pmu_ops = { .mask_int = xgene_pmu_mask_int, .unmask_int = xgene_pmu_unmask_int, @@ -1773,9 +1777,9 @@ static const struct of_device_id xgene_pmu_of_match[] = { MODULE_DEVICE_TABLE(of, xgene_pmu_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id xgene_pmu_acpi_match[] = { - {"APMC0D5B", PCP_PMU_V1}, - {"APMC0D5C", PCP_PMU_V2}, - {"APMC0D83", PCP_PMU_V3}, + {"APMC0D5B", (kernel_ulong_t)&xgene_pmu_data}, + {"APMC0D5C", (kernel_ulong_t)&xgene_pmu_v2_data}, + {"APMC0D83", (kernel_ulong_t)&xgene_pmu_v3_data}, {}, }; MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match); @@ -1831,7 +1835,6 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) static int xgene_pmu_probe(struct platform_device *pdev) { const struct xgene_pmu_data *dev_data; - const struct of_device_id *of_id; struct xgene_pmu *xgene_pmu; int irq, rc; int version; @@ -1850,24 +1853,10 @@ static int xgene_pmu_probe(struct platform_device *pdev) xgene_pmu->dev = &pdev->dev; platform_set_drvdata(pdev, xgene_pmu); - version = -EINVAL; - of_id = of_match_device(xgene_pmu_of_match, &pdev->dev); - if (of_id) { - dev_data = (const struct xgene_pmu_data *) of_id->data; - version = dev_data->id; - } - -#ifdef CONFIG_ACPI - if (ACPI_COMPANION(&pdev->dev)) { - const struct acpi_device_id *acpi_id; - - acpi_id = acpi_match_device(xgene_pmu_acpi_match, &pdev->dev); - if (acpi_id) - version = (int) acpi_id->driver_data; - } -#endif - if (version < 0) + dev_data = device_get_match_data(&pdev->dev); + if (!dev_data) return -ENODEV; + version = dev_data->id; if (version == PCP_PMU_V3) xgene_pmu->ops = &xgene_pmu_v3_ops; -- cgit From 851354cbd12bb9500909733c3d4054306f61df87 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 16 Oct 2023 16:31:27 +0100 Subject: clocksource/drivers/arm_arch_timer: limit XGene-1 workaround The AppliedMicro XGene-1 CPU has an erratum where the timer condition would only consider TVAL, not CVAL. We currently apply a workaround when seeing the PartNum field of MIDR_EL1 being 0x000, under the assumption that this would match only the XGene-1 CPU model. However even the Ampere eMAG (aka XGene-3) uses that same part number, and only differs in the "Variant" and "Revision" fields: XGene-1's MIDR is 0x500f0000, our eMAG reports 0x503f0002. Experiments show the latter doesn't show the faulty behaviour. Increase the specificity of the check to only consider partnum 0x000 and variant 0x00, to exclude the Ampere eMAG. Fixes: 012f18850452 ("clocksource/drivers/arm_arch_timer: Work around broken CVAL implementations") Reported-by: Ross Burton Signed-off-by: Andre Przywara Acked-by: Marc Zyngier Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20231016153127.116101-1-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- drivers/clocksource/arm_arch_timer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 7dd2c615bce2..071b04f1ee73 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -836,8 +836,9 @@ static u64 __arch_timer_check_delta(void) * Note that TVAL is signed, thus has only 31 of its * 32 bits to express magnitude. */ - MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM, - APM_CPU_PART_POTENZA)), + MIDR_REV_RANGE(MIDR_CPU_MODEL(ARM_CPU_IMP_APM, + APM_CPU_PART_XGENE), + APM_CPU_VAR_POTENZA, 0x0, 0xf), {}, }; -- cgit From 50b560783f7f71790bcf70e9e9855155fb0af8c1 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 19 Oct 2023 17:13:52 +0800 Subject: drivers/perf: hisi: use cpuhp_state_remove_instance_nocalls() for hisi_hns3_pmu uninit process When tearing down a 'hisi_hns3' PMU, we mistakenly run the CPU hotplug callbacks after the device has been unregistered, leading to fireworks when we try to execute empty function callbacks within the driver: | Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 | CPU: 0 PID: 15 Comm: cpuhp/0 Tainted: G W O 5.12.0-rc4+ #1 | Hardware name: , BIOS KpxxxFPGA 1P B600 V143 04/22/2021 | pstate: 80400009 (Nzcv daif +PAN -UAO -TCO BTYPE=--) | pc : perf_pmu_migrate_context+0x98/0x38c | lr : perf_pmu_migrate_context+0x94/0x38c | | Call trace: | perf_pmu_migrate_context+0x98/0x38c | hisi_hns3_pmu_offline_cpu+0x104/0x12c [hisi_hns3_pmu] Use cpuhp_state_remove_instance_nocalls() instead of cpuhp_state_remove_instance() so that the notifiers don't execute after the PMU device has been unregistered. Fixes: 66637ab137b4 ("drivers/perf: hisi: add driver for HNS3 PMU") Signed-off-by: Hao Chen Signed-off-by: Jijie Shao Reviewed-by: Yicong Yang Link: https://lore.kernel.org/r/20231019091352.998964-1-shaojijie@huawei.com [will: Rewrote commit message] Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hns3_pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c index e0457d84af6b..16869bf5bf4c 100644 --- a/drivers/perf/hisilicon/hns3_pmu.c +++ b/drivers/perf/hisilicon/hns3_pmu.c @@ -1556,8 +1556,8 @@ static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu) ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1); if (ret) { pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, - &hns3_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); } return ret; @@ -1568,8 +1568,8 @@ static void hns3_pmu_uninit_pmu(struct pci_dev *pdev) struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev); perf_pmu_unregister(&hns3_pmu->pmu); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, - &hns3_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE, + &hns3_pmu->node); } static int hns3_pmu_init_dev(struct pci_dev *pdev) -- cgit From 58f8fc57b1d314b5402c374a8c454ac7c870574c Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 13 Oct 2023 08:13:54 +0530 Subject: drivers: perf: arm_pmuv3: Read PMMIR_EL1 unconditionally Currently the PMUv3 driver only reads PMMIR_EL1 if the PMU implements FEAT_PMUv3p4 and the STALL_SLOT event, but the check for STALL_SLOT event isn't necessary and can be removed. The check for STALL_SLOT event was introduced with the read of PMMIR_EL1 in commit f5be3a61fdb5dd11 ("arm64: perf: Add support caps under sysfs") When this logic was written, the ARM ARM said: | If STALL_SLOT is not implemented, it is IMPLEMENTATION DEFINED whether | the PMMIR System registers are implemented. ... and thus the driver had to check for STALL_SLOT event to verify that PMMIR_EL1 was implemented and accesses to PMMIR_EL1 would not be UNDEFINED. Subsequently, the architecture was retrospectively tightened to require that any FEAT_PMUv3p4 implementation implements PMMIR_EL1. Since the G.b release of the ARM ARM, the wording regarding STALL_SLOT event has been removed, and the description of PMMIR_EL1 says: | This register is present only when FEAT_PMUv3p4 is implemented. Drop the unnecessary check for STALL_SLOT event when reading PMMIR_EL1. Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: James Clark Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20231013024354.1289070-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 8fcaa26f0f8a..4ee0afe00527 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1126,7 +1126,7 @@ static void __armv8pmu_probe_pmu(void *info) pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* store PMMIR register for sysfs */ - if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31))) + if (is_pmuv3p4(pmuver)) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; -- cgit From 3b9a22d345ff89232227b2449a311bf3f910f5f2 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 16 Oct 2023 08:24:36 +0530 Subject: drivers: perf: arm_pmuv3: Drop some unused arguments from armv8_pmu_init() All the PMU init functions want the default sysfs attribute groups, and so these all call armv8_pmu_init_nogroups() helper, with none of them calling armv8_pmu_init() directly. When we introduced armv8_pmu_init_nogroups() in the commit e424b1798526 ("arm64: perf: Refactor PMU init callbacks") ... we thought that we might need custom attribute groups in future, but as we evidently haven't, we can remove the option. This patch folds armv8_pmu_init_nogroups() into armv8_pmu_init(), removing the ability to use custom attribute groups and simplifying the code. CC: James Clark Cc: Robin Murphy Cc: Will Deacon Cc: Mark Rutland Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: Mark Rutland Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20231016025436.1368945-1-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmuv3.c | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 4ee0afe00527..4f6923ad4589 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -1187,10 +1187,7 @@ static void armv8_pmu_register_sysctl_table(void) } static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, - int (*map_event)(struct perf_event *event), - const struct attribute_group *events, - const struct attribute_group *format, - const struct attribute_group *caps) + int (*map_event)(struct perf_event *event)) { int ret = armv8pmu_probe_pmu(cpu_pmu); if (ret) @@ -1212,27 +1209,17 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->name = name; cpu_pmu->map_event = map_event; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ? - events : &armv8_pmuv3_events_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ? - format : &armv8_pmuv3_format_attr_group; - cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ? - caps : &armv8_pmuv3_caps_attr_group; - + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = &armv8_pmuv3_caps_attr_group; armv8_pmu_register_sysctl_table(); return 0; } -static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name, - int (*map_event)(struct perf_event *event)) -{ - return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL); -} - #define PMUV3_INIT_SIMPLE(name) \ static int name##_pmu_init(struct arm_pmu *cpu_pmu) \ { \ - return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\ + return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event); \ } PMUV3_INIT_SIMPLE(armv8_pmuv3) @@ -1263,44 +1250,37 @@ PMUV3_INIT_SIMPLE(armv8_nvidia_denver) static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35", - armv8_a53_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", armv8_a53_map_event); } static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53", - armv8_a53_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", armv8_a53_map_event); } static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57", - armv8_a57_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event); } static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72", - armv8_a57_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", armv8_a57_map_event); } static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73", - armv8_a73_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", armv8_a73_map_event); } static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder", - armv8_thunder_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", armv8_thunder_map_event); } static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu) { - return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan", - armv8_vulcan_map_event); + return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", armv8_vulcan_map_event); } static const struct of_device_id armv8_pmu_of_device_ids[] = { -- cgit From e3e73f511c49c741f6309862c2248958ad77bbaa Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 20 Oct 2023 18:51:25 +0100 Subject: perf/arm-cmn: Fix DTC domain detection It transpires that dtm_unit_info is another register which got shuffled in CMN-700 without me noticing. Fix that in a way which also proactively fixes the fragile laziness of its consumer, just in case any further fields ever get added alongside dtc_domain. Fixes: 23760a014417 ("perf/arm-cmn: Add CMN-700 support") Signed-off-by: Robin Murphy Reviewed-by: Ilkka Koskinen Link: https://lore.kernel.org/r/3076ee83d0554f6939fbb6ee49ab2bdb28d8c7ee.1697824215.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 913dc04b3a40..f1ac8d0cdb3b 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -112,7 +112,9 @@ #define CMN_DTM_PMEVCNTSR 0x240 -#define CMN_DTM_UNIT_INFO 0x0910 +#define CMN650_DTM_UNIT_INFO 0x0910 +#define CMN_DTM_UNIT_INFO 0x0960 +#define CMN_DTM_UNIT_INFO_DTC_DOMAIN GENMASK_ULL(1, 0) #define CMN_DTM_NUM_COUNTERS 4 /* Want more local counters? Why not replicate the whole DTM! Ugh... */ @@ -2117,6 +2119,16 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) return 0; } +static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_region) +{ + int offset = CMN_DTM_UNIT_INFO; + + if (cmn->part == PART_CMN650 || cmn->part == PART_CI700) + offset = CMN650_DTM_UNIT_INFO; + + return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset)); +} + static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node) { int level; @@ -2248,7 +2260,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) if (cmn->part == PART_CMN600) xp->dtc = 0xf; else - xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO); + xp->dtc = 1 << arm_cmn_dtc_domain(cmn, xp_region); xp->dtm = dtm - cmn->dtms; arm_cmn_init_dtm(dtm++, xp, 0); -- cgit From 7633ec2c262fab3e7c5bf3cd3876b5748f584a57 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 20 Oct 2023 18:51:26 +0100 Subject: perf/arm-cmn: Rework DTC counters (again) The bitmap-based scheme for tracking DTC counter usage turns out to be a complete dead-end for its imagined purpose, since by the time we have to keep track of a per-DTC counter index anyway, we already have enough information to make the bitmap itself redundant. Revert the remains of it back to almost the original scheme, but now expanded to track per-DTC indices, in preparation for making use of them in anger. Note that since cycle count events always use a dedicated counter on a single DTC, we reuse the field to encode their DTC index directly. Signed-off-by: Robin Murphy Reviewed-by: Ilkka Koskinen Link: https://lore.kernel.org/r/5f6ade76b47f033836d7a36c03555da896dfb4a3.1697824215.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 126 +++++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 62 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index f1ac8d0cdb3b..675f1638013e 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -281,16 +281,13 @@ struct arm_cmn_node { u16 id, logid; enum cmn_node_type type; - int dtm; - union { - /* DN/HN-F/CXHA */ - struct { - u8 val : 4; - u8 count : 4; - } occupid[SEL_MAX]; - /* XP */ - u8 dtc; - }; + u8 dtm; + s8 dtc; + /* DN/HN-F/CXHA */ + struct { + u8 val : 4; + u8 count : 4; + } occupid[SEL_MAX]; union { u8 event[4]; __le32 event_sel; @@ -540,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data) seq_puts(s, "\n |"); for (x = 0; x < cmn->mesh_x; x++) { - u8 dtc = cmn->xps[xp_base + x].dtc; + s8 dtc = cmn->xps[xp_base + x].dtc; - if (dtc & (dtc - 1)) + if (dtc < 0) seq_puts(s, " DTC ?? |"); else - seq_printf(s, " DTC %ld |", __ffs(dtc)); + seq_printf(s, " DTC %d |", dtc); } seq_puts(s, "\n |"); for (x = 0; x < cmn->mesh_x; x++) @@ -589,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {} struct arm_cmn_hw_event { struct arm_cmn_node *dn; u64 dtm_idx[4]; - unsigned int dtc_idx; - u8 dtcs_used; + s8 dtc_idx[CMN_MAX_DTCS]; u8 num_dns; u8 dtm_offset; bool wide_sel; @@ -600,6 +596,10 @@ struct arm_cmn_hw_event { #define for_each_hw_dn(hw, dn, i) \ for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++) +/* @i is the DTC number, @idx is the counter index on that DTC */ +#define for_each_hw_dtc_idx(hw, i, idx) \ + for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0) + static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event) { BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target)); @@ -1429,12 +1429,11 @@ static void arm_cmn_init_counter(struct perf_event *event) { struct arm_cmn *cmn = to_cmn(event->pmu); struct arm_cmn_hw_event *hw = to_cmn_hw(event); - unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx); u64 count; - for (i = 0; hw->dtcs_used & (1U << i); i++) { - writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt); - cmn->dtc[i].counters[hw->dtc_idx] = event; + for_each_hw_dtc_idx(hw, i, idx) { + writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx)); + cmn->dtc[i].counters[idx] = event; } count = arm_cmn_read_dtm(cmn, hw, false); @@ -1447,11 +1446,9 @@ static void arm_cmn_event_read(struct perf_event *event) struct arm_cmn_hw_event *hw = to_cmn_hw(event); u64 delta, new, prev; unsigned long flags; - unsigned int i; - if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) { - i = __ffs(hw->dtcs_used); - delta = arm_cmn_read_cc(cmn->dtc + i); + if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) { + delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]); local64_add(delta, &event->count); return; } @@ -1461,8 +1458,8 @@ static void arm_cmn_event_read(struct perf_event *event) delta = new - prev; local_irq_save(flags); - for (i = 0; hw->dtcs_used & (1U << i); i++) { - new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx); + for_each_hw_dtc_idx(hw, i, idx) { + new = arm_cmn_read_counter(cmn->dtc + i, idx); delta += new << 16; } local_irq_restore(flags); @@ -1518,7 +1515,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags) int i; if (type == CMN_TYPE_DTC) { - i = __ffs(hw->dtcs_used); + i = hw->dtc_idx[0]; writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR); cmn->dtc[i].cc_active = true; } else if (type == CMN_TYPE_WP) { @@ -1549,7 +1546,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags) int i; if (type == CMN_TYPE_DTC) { - i = __ffs(hw->dtcs_used); + i = hw->dtc_idx[0]; cmn->dtc[i].cc_active = false; } else if (type == CMN_TYPE_WP) { int wp_idx = arm_cmn_wp_idx(event); @@ -1735,12 +1732,19 @@ static int arm_cmn_event_init(struct perf_event *event) hw->dn = arm_cmn_node(cmn, type); if (!hw->dn) return -EINVAL; + + memset(hw->dtc_idx, -1, sizeof(hw->dtc_idx)); for (dn = hw->dn; dn->type == type; dn++) { if (bynodeid && dn->id != nodeid) { hw->dn++; continue; } hw->num_dns++; + if (dn->dtc < 0) + memset(hw->dtc_idx, 0, cmn->num_dtcs); + else + hw->dtc_idx[dn->dtc] = 0; + if (bynodeid) break; } @@ -1752,12 +1756,6 @@ static int arm_cmn_event_init(struct perf_event *event) nodeid, nid.x, nid.y, nid.port, nid.dev, type); return -EINVAL; } - /* - * Keep assuming non-cycles events count in all DTC domains; turns out - * it's hard to make a worthwhile optimisation around this, short of - * going all-in with domain-local counter allocation as well. - */ - hw->dtcs_used = (1U << cmn->num_dtcs) - 1; return arm_cmn_validate_group(cmn, event); } @@ -1783,28 +1781,25 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event, } memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx)); - for (i = 0; hw->dtcs_used & (1U << i); i++) - cmn->dtc[i].counters[hw->dtc_idx] = NULL; + for_each_hw_dtc_idx(hw, j, idx) + cmn->dtc[j].counters[idx] = NULL; } static int arm_cmn_event_add(struct perf_event *event, int flags) { struct arm_cmn *cmn = to_cmn(event->pmu); struct arm_cmn_hw_event *hw = to_cmn_hw(event); - struct arm_cmn_dtc *dtc = &cmn->dtc[0]; struct arm_cmn_node *dn; enum cmn_node_type type = CMN_EVENT_TYPE(event); - unsigned int i, dtc_idx, input_sel; + unsigned int input_sel, i = 0; if (type == CMN_TYPE_DTC) { - i = 0; while (cmn->dtc[i].cycles) if (++i == cmn->num_dtcs) return -ENOSPC; cmn->dtc[i].cycles = event; - hw->dtc_idx = CMN_DT_NUM_COUNTERS; - hw->dtcs_used = 1U << i; + hw->dtc_idx[0] = i; if (flags & PERF_EF_START) arm_cmn_event_start(event, 0); @@ -1812,17 +1807,22 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) } /* Grab a free global counter first... */ - dtc_idx = 0; - while (dtc->counters[dtc_idx]) - if (++dtc_idx == CMN_DT_NUM_COUNTERS) - return -ENOSPC; - - hw->dtc_idx = dtc_idx; + for_each_hw_dtc_idx(hw, j, idx) { + if (j > 0) { + idx = hw->dtc_idx[0]; + } else { + idx = 0; + while (cmn->dtc[j].counters[idx]) + if (++idx == CMN_DT_NUM_COUNTERS) + goto free_dtms; + } + hw->dtc_idx[j] = idx; + } /* ...then the local counters to feed it. */ for_each_hw_dn(hw, dn, i) { struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset; - unsigned int dtm_idx, shift; + unsigned int dtm_idx, shift, d = 0; u64 reg; dtm_idx = 0; @@ -1841,11 +1841,11 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) tmp = dtm->wp_event[wp_idx ^ 1]; if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) != - CMN_EVENT_WP_COMBINE(dtc->counters[tmp])) + CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp])) goto free_dtms; input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx; - dtm->wp_event[wp_idx] = dtc_idx; + dtm->wp_event[wp_idx] = hw->dtc_idx[d]; writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx)); } else { struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id); @@ -1865,7 +1865,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) dtm->input_sel[dtm_idx] = input_sel; shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx); dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift); - dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift; + dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, hw->dtc_idx[d]) << shift; dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx); reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low; writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG); @@ -1893,7 +1893,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags) arm_cmn_event_stop(event, PERF_EF_UPDATE); if (type == CMN_TYPE_DTC) - cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL; + cmn->dtc[hw->dtc_idx[0]].cycles = NULL; else arm_cmn_event_clear(cmn, event, hw->num_dns); } @@ -2074,7 +2074,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) { struct arm_cmn_node *dn, *xp; int dtc_idx = 0; - u8 dtcs_present = (1 << cmn->num_dtcs) - 1; cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL); if (!cmn->dtc) @@ -2084,23 +2083,26 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP); + if (cmn->part == PART_CMN600 && cmn->num_dtcs > 1) { + /* We do at least know that a DTC's XP must be in that DTC's domain */ + dn = arm_cmn_node(cmn, CMN_TYPE_DTC); + for (int i = 0; i < cmn->num_dtcs; i++) + arm_cmn_node_to_xp(cmn, dn + i)->dtc = i; + } + for (dn = cmn->dns; dn->type; dn++) { - if (dn->type == CMN_TYPE_XP) { - dn->dtc &= dtcs_present; + if (dn->type == CMN_TYPE_XP) continue; - } xp = arm_cmn_node_to_xp(cmn, dn); + dn->dtc = xp->dtc; dn->dtm = xp->dtm; if (cmn->multi_dtm) dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2; if (dn->type == CMN_TYPE_DTC) { - int err; - /* We do at least know that a DTC's XP must be in that DTC's domain */ - if (xp->dtc == 0xf) - xp->dtc = 1 << dtc_idx; - err = arm_cmn_init_dtc(cmn, dn, dtc_idx++); + int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++); + if (err) return err; } @@ -2258,9 +2260,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset) cmn->mesh_x = xp->logid; if (cmn->part == PART_CMN600) - xp->dtc = 0xf; + xp->dtc = -1; else - xp->dtc = 1 << arm_cmn_dtc_domain(cmn, xp_region); + xp->dtc = arm_cmn_dtc_domain(cmn, xp_region); xp->dtm = dtm - cmn->dtms; arm_cmn_init_dtm(dtm++, xp, 0); -- cgit From ab33c66fd8f124f7c4a35c96fcfbcd262764b0f6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 20 Oct 2023 18:51:27 +0100 Subject: perf/arm-cmn: Enable per-DTC counter allocation Finally enable independent per-DTC-domain counter allocation, except on CMN-600 where we still need to cope with not knowing the domain topology and thus keep counter indices sychronised across domains. This allows users to simultaneously count up to 8 targeted events per domain, rather than 8 globally, for up to 4x wider coverage on maximum configurations. Even though this now looks deceptively simple, I stand by my previous assertion that it was a flippin' nightmare to implement; all the real head-scratchers are hidden in the foundations in the previous patch... Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/849f65566582cb102c6d0843d0f26e231180f8ac.1697824215.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 675f1638013e..9479e919c063 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1570,7 +1570,7 @@ struct arm_cmn_val { u8 dtm_count[CMN_MAX_DTMS]; u8 occupid[CMN_MAX_DTMS][SEL_MAX]; u8 wp[CMN_MAX_DTMS][4]; - int dtc_count; + int dtc_count[CMN_MAX_DTCS]; bool cycles; }; @@ -1591,7 +1591,8 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val, return; } - val->dtc_count++; + for_each_hw_dtc_idx(hw, dtc, idx) + val->dtc_count[dtc]++; for_each_hw_dn(hw, dn, i) { int wp_idx, dtm = dn->dtm, sel = hw->filter_sel; @@ -1638,8 +1639,9 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event) goto done; } - if (val->dtc_count == CMN_DT_NUM_COUNTERS) - goto done; + for (i = 0; i < CMN_MAX_DTCS; i++) + if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS) + goto done; for_each_hw_dn(hw, dn, i) { int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel; @@ -1806,9 +1808,9 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) return 0; } - /* Grab a free global counter first... */ + /* Grab the global counters first... */ for_each_hw_dtc_idx(hw, j, idx) { - if (j > 0) { + if (cmn->part == PART_CMN600 && j > 0) { idx = hw->dtc_idx[0]; } else { idx = 0; @@ -1819,10 +1821,10 @@ static int arm_cmn_event_add(struct perf_event *event, int flags) hw->dtc_idx[j] = idx; } - /* ...then the local counters to feed it. */ + /* ...then the local counters to feed them */ for_each_hw_dn(hw, dn, i) { struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset; - unsigned int dtm_idx, shift, d = 0; + unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0); u64 reg; dtm_idx = 0; -- cgit From 6d7d51e88e21c0af1ca96a3617afef334bfeffcf Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 24 Oct 2023 17:29:53 +0800 Subject: drivers/perf: hisi_pcie: Check the type first in pmu::event_init() Check whether the event type matches the PMU type firstly in pmu::event_init() before touching the event. Otherwise we'll change the events of others and lead to incorrect results. Since in perf_init_event() we may call every pmu's event_init() in a certain case, we should not modify the event if it's not ours. Fixes: 8404b0fbc7fb ("drivers/perf: hisi: Add driver for HiSilicon PCIe PMU") Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20231024092954.42297-2-yangyicong@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 5a00adb2de8c..051efffc44c8 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -353,6 +353,10 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event) struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + /* Check the type first before going on, otherwise it's not our event */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + event->cpu = pcie_pmu->on_cpu; if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event))) @@ -360,9 +364,6 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event) else hwc->event_base = HISI_PCIE_CNT; - if (event->attr.type != event->pmu->type) - return -ENOENT; - /* Sampling is not supported. */ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EOPNOTSUPP; -- cgit From 868f8a709874729998bcad3422124b678b18b755 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 24 Oct 2023 17:29:54 +0800 Subject: drivers/perf: hisi_pcie: Initialize event->cpu only on success Initialize the event->cpu only on success. To be more reasonable and keep consistent with other PMUs. Signed-off-by: Yicong Yang Link: https://lore.kernel.org/r/20231024092954.42297-3-yangyicong@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_pcie_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c index 051efffc44c8..b90ba8aca3fa 100644 --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c @@ -357,8 +357,6 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - event->cpu = pcie_pmu->on_cpu; - if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event))) hwc->event_base = HISI_PCIE_EXT_CNT; else @@ -374,6 +372,8 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event) if (!hisi_pcie_pmu_validate_event_group(event)) return -EINVAL; + event->cpu = pcie_pmu->on_cpu; + return 0; } -- cgit From b805cafc604bfdb671fae7347a57f51154afa735 Mon Sep 17 00:00:00 2001 From: Junhao He Date: Tue, 24 Oct 2023 19:36:30 +0800 Subject: perf: hisi: Fix use-after-free when register pmu fails When we fail to register the uncore pmu, the pmu context may not been allocated. The error handing will call cpuhp_state_remove_instance() to call uncore pmu offline callback, which migrate the pmu context. Since that's liable to lead to some kind of use-after-free. Use cpuhp_state_remove_instance_nocalls() instead of cpuhp_state_remove_instance() so that the notifiers don't execute after the PMU device has been failed to register. Fixes: a0ab25cd82ee ("drivers/perf: hisi: Add support for HiSilicon PA PMU driver") FIxes: 3bf30882c3c7 ("drivers/perf: hisi: Add support for HiSilicon SLLC PMU driver") Signed-off-by: Junhao He Link: https://lore.kernel.org/r/20231024113630.13472-1-hejunhao3@huawei.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 4 ++-- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index d941e746b424..797cf201996a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -505,8 +505,8 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&pa_pmu->pmu, name, -1); if (ret) { dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, - &pa_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + &pa_pmu->node); return ret; } diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c index 6fe534a665ed..e706ca567676 100644 --- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -450,8 +450,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev) ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); if (ret) { dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret); - cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, - &sllc_pmu->node); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + &sllc_pmu->node); return ret; } -- cgit From c54e52f84d7aa590e90e1f73f462517ac40051e1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 23 Oct 2023 14:35:03 +0100 Subject: arm64, irqchip/gic-v3, ACPI: Move MADT GICC enabled check into a helper ACPI, irqchip and the architecture code all inspect the MADT enabled bit for a GICC entry in the MADT. The addition of an 'online capable' bit means all these sites need updating. Move the current checks behind a helper to make future updates easier. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Acked-by: "Rafael J. Wysocki" Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/E1quv5D-00AeNJ-U8@rmk-PC.armlinux.org.uk Signed-off-by: Catalin Marinas --- drivers/acpi/processor_core.c | 2 +- drivers/irqchip/irq-gic-v3.c | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 7dd6dbaa98c3..b203cfe28550 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -90,7 +90,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry, struct acpi_madt_generic_interrupt *gicc = container_of(entry, struct acpi_madt_generic_interrupt, header); - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!acpi_gicc_is_usable(gicc)) return -ENODEV; /* device_declaration means Device object in DSDT, in the diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index eedfa8e9f077..72d3cdebdad1 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -2367,8 +2367,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; void __iomem *redist_base; - /* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!acpi_gicc_is_usable(gicc)) return 0; redist_base = ioremap(gicc->gicr_base_address, size); @@ -2418,7 +2417,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) { acpi_data.enabled_rdists++; return 0; } @@ -2427,7 +2426,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, * It's perfectly valid firmware can pass disabled GICC entry, driver * should not treat as errors, skip the entry instead of probe fail. */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!acpi_gicc_is_usable(gicc)) return 0; return -ENODEV; @@ -2486,8 +2485,7 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea int maint_irq_mode; static int first_madt = true; - /* Skip unusable CPUs */ - if (!(gicc->flags & ACPI_MADT_ENABLED)) + if (!acpi_gicc_is_usable(gicc)) return 0; maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? -- cgit