summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/perf/ampere_cspmu.rst29
-rw-r--r--Documentation/admin-guide/perf/index.rst1
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/include/asm/cpufeature.h2
-rw-r--r--arch/arm64/include/asm/cputype.h3
-rw-r--r--arch/arm64/include/asm/irq.h3
-rw-r--r--arch/arm64/include/asm/lse.h9
-rw-r--r--arch/arm64/include/asm/mte.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h34
-rw-r--r--arch/arm64/include/asm/smp.h4
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c2
-rw-r--r--arch/arm64/kernel/cpufeature.c55
-rw-r--r--arch/arm64/kernel/idle.c4
-rw-r--r--arch/arm64/kernel/module-plts.c6
-rw-r--r--arch/arm64/kernel/mte.c4
-rw-r--r--arch/arm64/kernel/smp.c144
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/mm/init.c11
-rw-r--r--drivers/acpi/processor_core.c2
-rw-r--r--drivers/clocksource/arm_arch_timer.c5
-rw-r--r--drivers/irqchip/irq-gic-v3.c71
-rw-r--r--drivers/perf/amlogic/meson_g12_ddr_pmu.c1
-rw-r--r--drivers/perf/arm-cmn.c154
-rw-r--r--drivers/perf/arm_cspmu/Kconfig19
-rw-r--r--drivers/perf/arm_cspmu/Makefile8
-rw-r--r--drivers/perf/arm_cspmu/ampere_cspmu.c272
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.c201
-rw-r--r--drivers/perf/arm_cspmu/arm_cspmu.h32
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.c34
-rw-r--r--drivers/perf/arm_cspmu/nvidia_cspmu.h17
-rw-r--r--drivers/perf/arm_pmuv3.c46
-rw-r--r--drivers/perf/hisilicon/hisi_pcie_pmu.c9
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pa_pmu.c4
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c4
-rw-r--r--drivers/perf/hisilicon/hns3_pmu.c8
-rw-r--r--drivers/perf/xgene_pmu.c37
-rw-r--r--include/linux/acpi.h5
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S19
38 files changed, 933 insertions, 334 deletions
diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst
new file mode 100644
index 000000000000..94f93f5aee6c
--- /dev/null
+++ b/Documentation/admin-guide/perf/ampere_cspmu.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Ampere SoC Performance Monitoring Unit (PMU)
+============================================
+
+Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture.
+Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the
+first phase it's used for counting MCU events on AmpereOne.
+
+
+MCU PMU events
+--------------
+
+The PMU driver supports setting filters for "rank", "bank", and "threshold".
+Note, that the filters are per PMU instance rather than per event.
+
+
+Example for perf tool use::
+
+ / # perf list ampere
+
+ ampere_mcu_pmu_0/act_sent/ [Kernel PMU event]
+ <...>
+ ampere_mcu_pmu_1/rd_sent/ [Kernel PMU event]
+ <...>
+
+ / # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
+ sleep 1
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index f60be04e4e33..a2e6f2c81146 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -22,3 +22,4 @@ Performance monitor support
nvidia-pmu
meson-ddr-pmu
cxl
+ ampere_cspmu
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b10515c0200b..cacc67121adb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1355,6 +1355,8 @@ choice
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
depends on !LD_IS_LLD || LLD_VERSION >= 130000
+ # https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c
+ depends on AS_IS_GNU || AS_VERSION >= 150000
help
Say Y if you plan on running a kernel with a big-endian userspace.
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 5bba39376055..19b4d001d845 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -23,6 +23,7 @@
#include <linux/bug.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
+#include <linux/cpumask.h>
/*
* CPU feature register tracking
@@ -380,6 +381,7 @@ struct arm64_cpu_capabilities {
* method is robust against being called multiple times.
*/
const struct arm64_cpu_capabilities *match_list;
+ const struct cpumask *cpus;
};
static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5f6f84837a49..818ea1c83b50 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -85,7 +85,8 @@
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
-#define APM_CPU_PART_POTENZA 0x000
+#define APM_CPU_PART_XGENE 0x000
+#define APM_CPU_VAR_POTENZA 0x00
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index fac08e18bcd5..50ce8b697ff3 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -6,6 +6,9 @@
#include <asm-generic/irq.h>
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
struct pt_regs;
int set_handle_irq(void (*handle_irq)(struct pt_regs *));
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index cbbcdc35c4cd..3129a5819d0e 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -16,14 +16,9 @@
#include <asm/atomic_lse.h>
#include <asm/cpucaps.h>
-static __always_inline bool system_uses_lse_atomics(void)
-{
- return alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS);
-}
-
#define __lse_ll_sc_body(op, ...) \
({ \
- system_uses_lse_atomics() ? \
+ alternative_has_cap_likely(ARM64_HAS_LSE_ATOMICS) ? \
__lse_##op(__VA_ARGS__) : \
__ll_sc_##op(__VA_ARGS__); \
})
@@ -34,8 +29,6 @@ static __always_inline bool system_uses_lse_atomics(void)
#else /* CONFIG_ARM64_LSE_ATOMICS */
-static inline bool system_uses_lse_atomics(void) { return false; }
-
#define __lse_ll_sc_body(op, ...) __ll_sc_##op(__VA_ARGS__)
#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 4cedbaa16f41..91fbd5c8a391 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page)
}
void mte_zero_clear_page_tags(void *addr);
-void mte_sync_tags(pte_t pte);
+void mte_sync_tags(pte_t pte, unsigned int nr_pages);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
@@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page)
static inline void mte_zero_clear_page_tags(void *addr)
{
}
-static inline void mte_sync_tags(pte_t pte)
+static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7f7d9b1df4e5..b19a8aee684c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -325,8 +325,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
__func__, pte_val(old_pte), pte_val(pte));
}
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
{
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
@@ -339,24 +338,22 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
*/
if (system_supports_mte() && pte_access_permitted(pte, false) &&
!pte_special(pte) && pte_tagged(pte))
- mte_sync_tags(pte);
-
- __check_safe_pte_update(mm, ptep, pte);
-
- set_pte(ptep, pte);
+ mte_sync_tags(pte, nr_pages);
}
-static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
+static inline void set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
{
page_table_check_ptes_set(mm, ptep, pte, nr);
+ __sync_cache_and_tags(pte, nr);
for (;;) {
- __set_pte_at(mm, addr, ptep, pte);
+ __check_safe_pte_update(mm, ptep, pte);
+ set_pte(ptep, pte);
if (--nr == 0)
break;
ptep++;
- addr += PAGE_SIZE;
pte_val(pte) += PAGE_SIZE;
}
}
@@ -531,18 +528,29 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
+static inline void __set_pte_at(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
+{
+ __sync_cache_and_tags(pte, nr);
+ __check_safe_pte_update(mm, ptep, pte);
+ set_pte(ptep, pte);
+}
+
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
page_table_check_pmd_set(mm, pmdp, pmd);
- return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd));
+ return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd),
+ PMD_SIZE >> PAGE_SHIFT);
}
static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
page_table_check_pud_set(mm, pudp, pud);
- return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud));
+ return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud),
+ PUD_SIZE >> PAGE_SHIFT);
}
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 9b31e6d0da17..efb13112b408 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -89,9 +89,9 @@ extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
-extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+extern void arch_send_wakeup_ipi(unsigned int cpu);
#else
-static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+static inline void arch_send_wakeup_ipi(unsigned int cpu)
{
BUILD_BUG();
}
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
index b1990e38aed0..e1be29e608b7 100644
--- a/arch/arm64/kernel/acpi_parking_protocol.c
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -103,7 +103,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
&mailbox->entry_point);
writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
- arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+ arch_send_wakeup_ipi(cpu);
return 0;
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index ea36dc532b3b..2ccb9dfd9960 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1828,6 +1828,8 @@ static int __init parse_kpti(char *str)
early_param("kpti", parse_kpti);
#ifdef CONFIG_ARM64_HW_AFDBM
+static struct cpumask dbm_cpus __read_mostly;
+
static inline void __cpu_enable_hw_dbm(void)
{
u64 tcr = read_sysreg(tcr_el1) | TCR_HD;
@@ -1863,35 +1865,22 @@ static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap)
static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap)
{
- if (cpu_can_use_dbm(cap))
+ if (cpu_can_use_dbm(cap)) {
__cpu_enable_hw_dbm();
+ cpumask_set_cpu(smp_processor_id(), &dbm_cpus);
+ }
}
static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap,
int __unused)
{
- static bool detected = false;
/*
* DBM is a non-conflicting feature. i.e, the kernel can safely
* run a mix of CPUs with and without the feature. So, we
* unconditionally enable the capability to allow any late CPU
* to use the feature. We only enable the control bits on the
- * CPU, if it actually supports.
- *
- * We have to make sure we print the "feature" detection only
- * when at least one CPU actually uses it. So check if this CPU
- * can actually use it and print the message exactly once.
- *
- * This is safe as all CPUs (including secondary CPUs - due to the
- * LOCAL_CPU scope - and the hotplugged CPUs - via verification)
- * goes through the "matches" check exactly once. Also if a CPU
- * matches the criteria, it is guaranteed that the CPU will turn
- * the DBM on, as the capability is unconditionally enabled.
+ * CPU, if it is supported.
*/
- if (!detected && cpu_can_use_dbm(cap)) {
- detected = true;
- pr_info("detected: Hardware dirty bit management\n");
- }
return true;
}
@@ -1924,8 +1913,6 @@ int get_cpu_with_amu_feat(void)
static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
{
if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) {
- pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
- smp_processor_id());
cpumask_set_cpu(smp_processor_id(), &amu_cpus);
/* 0 reference values signal broken/disabled counters */
@@ -2385,16 +2372,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
#endif /* CONFIG_ARM64_RAS_EXTN */
#ifdef CONFIG_ARM64_AMU_EXTN
{
- /*
- * The feature is enabled by default if CONFIG_ARM64_AMU_EXTN=y.
- * Therefore, don't provide .desc as we don't want the detection
- * message to be shown until at least one CPU is detected to
- * support the feature.
- */
+ .desc = "Activity Monitors Unit (AMU)",
.capability = ARM64_HAS_AMU_EXTN,
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.matches = has_amu,
.cpu_enable = cpu_amu_enable,
+ .cpus = &amu_cpus,
ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP)
},
#endif /* CONFIG_ARM64_AMU_EXTN */
@@ -2434,18 +2417,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
},
#ifdef CONFIG_ARM64_HW_AFDBM
{
- /*
- * Since we turn this on always, we don't want the user to
- * think that the feature is available when it may not be.
- * So hide the description.
- *
- * .desc = "Hardware pagetable Dirty Bit Management",
- *
- */
+ .desc = "Hardware dirty bit management",
.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
.capability = ARM64_HW_DBM,
.matches = has_hw_dbm,
.cpu_enable = cpu_enable_hw_dbm,
+ .cpus = &dbm_cpus,
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM)
},
#endif
@@ -2961,7 +2938,7 @@ static void update_cpu_capabilities(u16 scope_mask)
!caps->matches(caps, cpucap_default_scope(caps)))
continue;
- if (caps->desc)
+ if (caps->desc && !caps->cpus)
pr_info("detected: %s\n", caps->desc);
__set_bit(caps->capability, system_cpucaps);
@@ -3294,6 +3271,7 @@ unsigned long cpu_get_elf_hwcap2(void)
static void __init setup_system_capabilities(void)
{
+ int i;
/*
* We have finalised the system-wide safe feature
* registers, finalise the capabilities that depend
@@ -3302,6 +3280,15 @@ static void __init setup_system_capabilities(void)
*/
update_cpu_capabilities(SCOPE_SYSTEM);
enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU);
+
+ for (i = 0; i < ARM64_NCAPS; i++) {
+ const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i];
+
+ if (caps && caps->cpus && caps->desc &&
+ cpumask_any(caps->cpus) < nr_cpu_ids)
+ pr_info("detected: %s on CPU%*pbl\n",
+ caps->desc, cpumask_pr_args(caps->cpus));
+ }
}
void __init setup_cpu_features(void)
diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c
index c1125753fe9b..05cfb347ec26 100644
--- a/arch/arm64/kernel/idle.c
+++ b/arch/arm64/kernel/idle.c
@@ -20,7 +20,7 @@
* ensure that interrupts are not masked at the PMR (because the core will
* not wake up if we block the wake up signal in the interrupt controller).
*/
-void noinstr cpu_do_idle(void)
+void __cpuidle cpu_do_idle(void)
{
struct arm_cpuidle_irq_context context;
@@ -35,7 +35,7 @@ void noinstr cpu_do_idle(void)
/*
* This is our default idle handler.
*/
-void noinstr arch_cpu_idle(void)
+void __cpuidle arch_cpu_idle(void)
{
/*
* This should do all the clock switching and wait for interrupt
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index bd69a4e7cd60..79200f21e123 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -167,9 +167,6 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
switch (ELF64_R_TYPE(rela[i].r_info)) {
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- break;
-
/*
* We only have to consider branch targets that resolve
* to symbols that are defined in a different section.
@@ -269,9 +266,6 @@ static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela,
{
int i = 0, j = numrels - 1;
- if (!IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- return 0;
-
while (i < j) {
if (branch_rela_needs_plt(syms, &rela[i], dstidx))
i++;
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 4edecaac8f91..2fb5e7a7a4d5 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -35,10 +35,10 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
#endif
-void mte_sync_tags(pte_t pte)
+void mte_sync_tags(pte_t pte, unsigned int nr_pages)
{
struct page *page = pte_page(pte);
- long i, nr_pages = compound_nr(page);
+ unsigned int i;
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 960b98b43506..af876a45363b 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -32,7 +32,9 @@
#include <linux/irq_work.h>
#include <linux/kernel_stat.h>
#include <linux/kexec.h>
+#include <linux/kgdb.h>
#include <linux/kvm_host.h>
+#include <linux/nmi.h>
#include <asm/alternative.h>
#include <asm/atomic.h>
@@ -72,13 +74,19 @@ enum ipi_msg_type {
IPI_CPU_CRASH_STOP,
IPI_TIMER,
IPI_IRQ_WORK,
- IPI_WAKEUP,
- NR_IPI
+ NR_IPI,
+ /*
+ * Any enum >= NR_IPI and < MAX_IPI is special and not tracable
+ * with trace_ipi_*
+ */
+ IPI_CPU_BACKTRACE = NR_IPI,
+ IPI_KGDB_ROUNDUP,
+ MAX_IPI
};
-static int ipi_irq_base __read_mostly;
-static int nr_ipi __read_mostly = NR_IPI;
-static struct irq_desc *ipi_desc[NR_IPI] __read_mostly;
+static int ipi_irq_base __ro_after_init;
+static int nr_ipi __ro_after_init = NR_IPI;
+static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
static void ipi_setup(int cpu);
@@ -520,7 +528,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
{
u64 hwid = processor->arm_mpidr;
- if (!(processor->flags & ACPI_MADT_ENABLED)) {
+ if (!acpi_gicc_is_usable(processor)) {
pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid);
return;
}
@@ -764,7 +772,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
[IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
- [IPI_WAKEUP] = "CPU wake-up interrupts",
};
static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
@@ -797,13 +804,6 @@ void arch_send_call_function_single_ipi(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
}
-#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
-void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
-{
- smp_cross_call(mask, IPI_WAKEUP);
-}
-#endif
-
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
@@ -854,6 +854,38 @@ static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs
#endif
}
+static void arm64_backtrace_ipi(cpumask_t *mask)
+{
+ __ipi_send_mask(ipi_desc[IPI_CPU_BACKTRACE], mask);
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
+{
+ /*
+ * NOTE: though nmi_trigger_cpumask_backtrace() has "nmi_" in the name,
+ * nothing about it truly needs to be implemented using an NMI, it's
+ * just that it's _allowed_ to work with NMIs. If ipi_should_be_nmi()
+ * returned false our backtrace attempt will just use a regular IPI.
+ */
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, arm64_backtrace_ipi);
+}
+
+#ifdef CONFIG_KGDB
+void kgdb_roundup_cpus(void)
+{
+ int this_cpu = raw_smp_processor_id();
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ /* No need to roundup ourselves */
+ if (cpu == this_cpu)
+ continue;
+
+ __ipi_send_single(ipi_desc[IPI_KGDB_ROUNDUP], cpu);
+ }
+}
+#endif
+
/*
* Main handler for inter-processor interrupts
*/
@@ -897,13 +929,17 @@ static void do_handle_IPI(int ipinr)
break;
#endif
-#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
- case IPI_WAKEUP:
- WARN_ONCE(!acpi_parking_protocol_valid(cpu),
- "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
- cpu);
+ case IPI_CPU_BACKTRACE:
+ /*
+ * NOTE: in some cases this _won't_ be NMI context. See the
+ * comment in arch_trigger_cpumask_backtrace().
+ */
+ nmi_cpu_backtrace(get_irq_regs());
+ break;
+
+ case IPI_KGDB_ROUNDUP:
+ kgdb_nmicallback(cpu, get_irq_regs());
break;
-#endif
default:
pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
@@ -926,6 +962,25 @@ static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
__ipi_send_mask(ipi_desc[ipinr], target);
}
+static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
+{
+ DECLARE_STATIC_KEY_FALSE(supports_pseudo_nmis);
+
+ if (!system_uses_irq_prio_masking() ||
+ !static_branch_likely(&supports_pseudo_nmis))
+ return false;
+
+ switch (ipi) {
+ case IPI_CPU_STOP:
+ case IPI_CPU_CRASH_STOP:
+ case IPI_CPU_BACKTRACE:
+ case IPI_KGDB_ROUNDUP:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void ipi_setup(int cpu)
{
int i;
@@ -933,8 +988,14 @@ static void ipi_setup(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
- for (i = 0; i < nr_ipi; i++)
- enable_percpu_irq(ipi_irq_base + i, 0);
+ for (i = 0; i < nr_ipi; i++) {
+ if (ipi_should_be_nmi(i)) {
+ prepare_percpu_nmi(ipi_irq_base + i);
+ enable_percpu_nmi(ipi_irq_base + i, 0);
+ } else {
+ enable_percpu_irq(ipi_irq_base + i, 0);
+ }
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -945,8 +1006,14 @@ static void ipi_teardown(int cpu)
if (WARN_ON_ONCE(!ipi_irq_base))
return;
- for (i = 0; i < nr_ipi; i++)
- disable_percpu_irq(ipi_irq_base + i);
+ for (i = 0; i < nr_ipi; i++) {
+ if (ipi_should_be_nmi(i)) {
+ disable_percpu_nmi(ipi_irq_base + i);
+ teardown_percpu_nmi(ipi_irq_base + i);
+ } else {
+ disable_percpu_irq(ipi_irq_base + i);
+ }
+ }
}
#endif
@@ -954,15 +1021,23 @@ void __init set_smp_ipi_range(int ipi_base, int n)
{
int i;
- WARN_ON(n < NR_IPI);
- nr_ipi = min(n, NR_IPI);
+ WARN_ON(n < MAX_IPI);
+ nr_ipi = min(n, MAX_IPI);
for (i = 0; i < nr_ipi; i++) {
int err;
- err = request_percpu_irq(ipi_base + i, ipi_handler,
- "IPI", &cpu_number);
- WARN_ON(err);
+ if (ipi_should_be_nmi(i)) {
+ err = request_percpu_nmi(ipi_base + i, ipi_handler,
+ "IPI", &cpu_number);
+ WARN(err, "Could not request IPI %d as NMI, err=%d\n",
+ i, err);
+ } else {
+ err = request_percpu_irq(ipi_base + i, ipi_handler,
+ "IPI", &cpu_number);
+ WARN(err, "Could not request IPI %d as IRQ, err=%d\n",
+ i, err);
+ }
ipi_desc[i] = irq_to_desc(ipi_base + i);
irq_set_status_flags(ipi_base + i, IRQ_HIDDEN);
@@ -979,6 +1054,17 @@ void arch_smp_send_reschedule(int cpu)
smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
}
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+void arch_send_wakeup_ipi(unsigned int cpu)
+{
+ /*
+ * We use a scheduler IPI to wake the CPU as this avoids the need for a
+ * dedicated IPI and we can safely handle spurious scheduler IPIs.
+ */
+ smp_send_reschedule(cpu);
+}
+#endif
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 95f6945c4432..a1710e5fa72b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -874,7 +874,7 @@ u32 __attribute_const__ kvm_target_cpu(void)
break;
case ARM_CPU_IMP_APM:
switch (part_number) {
- case APM_CPU_PART_POTENZA:
+ case APM_CPU_PART_XGENE:
return KVM_ARM_TARGET_XGENE_POTENZA;
}
break;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 8a0f8604348b..8deec68028ac 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -16,6 +16,7 @@
#include <linux/nodemask.h>
#include <linux/initrd.h>
#include <linux/gfp.h>
+#include <linux/math.h>
#include <linux/memblock.h>
#include <linux/sort.h>
#include <linux/of.h>
@@ -493,8 +494,16 @@ void __init mem_init(void)
{
bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit);
- if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC))
+ if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb) {
+ /*
+ * If no bouncing needed for ZONE_DMA, reduce the swiotlb
+ * buffer for kmalloc() bouncing to 1MB per 1GB of RAM.
+ */
+ unsigned long size =
+ DIV_ROUND_UP(memblock_phys_mem_size(), 1024);
+ swiotlb_adjust_size(min(swiotlb_size_or_default(), size));
swiotlb = true;
+ }
swiotlb_init(swiotlb, SWIOTLB_VERBOSE);
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 7dd6dbaa98c3..b203cfe28550 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -90,7 +90,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry,
struct acpi_madt_generic_interrupt *gicc =
container_of(entry, struct acpi_madt_generic_interrupt, header);
- if (!(gicc->flags & ACPI_MADT_ENABLED))
+ if (!acpi_gicc_is_usable(gicc))
return -ENODEV;
/* device_declaration means Device object in DSDT, in the
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 7dd2c615bce2..071b04f1ee73 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -836,8 +836,9 @@ static u64 __arch_timer_check_delta(void)
* Note that TVAL is signed, thus has only 31 of its
* 32 bits to express magnitude.
*/
- MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
- APM_CPU_PART_POTENZA)),
+ MIDR_REV_RANGE(MIDR_CPU_MODEL(ARM_CPU_IMP_APM,
+ APM_CPU_PART_XGENE),
+ APM_CPU_VAR_POTENZA, 0x0, 0xf),
{},
};
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index eedfa8e9f077..580f155fa0ff 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -79,6 +79,13 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
#define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
/*
+ * There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs
+ * are potentially stolen by the secure side. Some code, especially code dealing
+ * with hwirq IDs, is simplified by accounting for all 16.
+ */
+#define SGI_NR 16
+
+/*
* The behaviours of RPR and PMR registers differ depending on the value of
* SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the
* distributor and redistributors depends on whether security is enabled in the
@@ -99,7 +106,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
* - Figure 4-7 Secure read of the priority field for a Non-secure Group 1
* interrupt.
*/
-static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
+DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities);
EXPORT_SYMBOL(gic_nonsecure_priorities);
@@ -125,8 +132,8 @@ EXPORT_SYMBOL(gic_nonsecure_priorities);
__priority; \
})
-/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
-static refcount_t *ppi_nmi_refs;
+/* rdist_nmi_refs[n] == number of cpus having the rdist interrupt n set as NMI */
+static refcount_t *rdist_nmi_refs;
static struct gic_kvm_info gic_v3_kvm_info __initdata;
static DEFINE_PER_CPU(bool, has_rss);
@@ -519,9 +526,22 @@ static u32 __gic_get_ppi_index(irq_hw_number_t hwirq)
}
}
-static u32 gic_get_ppi_index(struct irq_data *d)
+static u32 __gic_get_rdist_index(irq_hw_number_t hwirq)
+{
+ switch (__get_intid_range(hwirq)) {
+ case SGI_RANGE:
+ case PPI_RANGE:
+ return hwirq;
+ case EPPI_RANGE:
+ return hwirq - EPPI_BASE_INTID + 32;
+ default:
+ unreachable();
+ }
+}
+
+static u32 gic_get_rdist_index(struct irq_data *d)
{
- return __gic_get_ppi_index(d->hwirq);
+ return __gic_get_rdist_index(d->hwirq);
}
static int gic_irq_nmi_setup(struct irq_data *d)
@@ -545,11 +565,14 @@ static int gic_irq_nmi_setup(struct irq_data *d)
/* desc lock should already be held */
if (gic_irq_in_rdist(d)) {
- u32 idx = gic_get_ppi_index(d);
+ u32 idx = gic_get_rdist_index(d);
- /* Setting up PPI as NMI, only switch handler for first NMI */
- if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) {
- refcount_set(&ppi_nmi_refs[idx], 1);
+ /*
+ * Setting up a percpu interrupt as NMI, only switch handler
+ * for first NMI
+ */
+ if (!refcount_inc_not_zero(&rdist_nmi_refs[idx])) {
+ refcount_set(&rdist_nmi_refs[idx], 1);
desc->handle_irq = handle_percpu_devid_fasteoi_nmi;
}
} else {
@@ -582,10 +605,10 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
/* desc lock should already be held */
if (gic_irq_in_rdist(d)) {
- u32 idx = gic_get_ppi_index(d);
+ u32 idx = gic_get_rdist_index(d);
/* Tearing down NMI, only switch handler for last NMI */
- if (refcount_dec_and_test(&ppi_nmi_refs[idx]))
+ if (refcount_dec_and_test(&rdist_nmi_refs[idx]))
desc->handle_irq = handle_percpu_devid_irq;
} else {
desc->handle_irq = handle_fasteoi_irq;
@@ -1279,10 +1302,10 @@ static void gic_cpu_init(void)
rbase = gic_data_rdist_sgi_base();
/* Configure SGIs/PPIs as non-secure Group-1 */
- for (i = 0; i < gic_data.ppi_nr + 16; i += 32)
+ for (i = 0; i < gic_data.ppi_nr + SGI_NR; i += 32)
writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8);
- gic_cpu_config(rbase, gic_data.ppi_nr + 16, gic_redist_wait_for_rwp);
+ gic_cpu_config(rbase, gic_data.ppi_nr + SGI_NR, gic_redist_wait_for_rwp);
/* initialise system registers */
gic_cpu_sys_reg_init();
@@ -1939,12 +1962,13 @@ static void gic_enable_nmi_support(void)
return;
}
- ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL);
- if (!ppi_nmi_refs)
+ rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR,
+ sizeof(*rdist_nmi_refs), GFP_KERNEL);
+ if (!rdist_nmi_refs)
return;
- for (i = 0; i < gic_data.ppi_nr; i++)
- refcount_set(&ppi_nmi_refs[i], 0);
+ for (i = 0; i < gic_data.ppi_nr + SGI_NR; i++)
+ refcount_set(&rdist_nmi_refs[i], 0);
pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n",
gic_has_relaxed_pmr_sync() ? "relaxed" : "forced");
@@ -2061,6 +2085,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gic_dist_init();
gic_cpu_init();
+ gic_enable_nmi_support();
gic_smp_init();
gic_cpu_pm_init();
@@ -2073,8 +2098,6 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base,
gicv2m_init(handle, gic_data.domain);
}
- gic_enable_nmi_support();
-
return 0;
out_free:
@@ -2367,8 +2390,7 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
void __iomem *redist_base;
- /* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */
- if (!(gicc->flags & ACPI_MADT_ENABLED))
+ if (!acpi_gicc_is_usable(gicc))
return 0;
redist_base = ioremap(gicc->gicr_base_address, size);
@@ -2418,7 +2440,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
* If GICC is enabled and has valid gicr base address, then it means
* GICR base is presented via GICC
*/
- if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) {
+ if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) {
acpi_data.enabled_rdists++;
return 0;
}
@@ -2427,7 +2449,7 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
* It's perfectly valid firmware can pass disabled GICC entry, driver
* should not treat as errors, skip the entry instead of probe fail.
*/
- if (!(gicc->flags & ACPI_MADT_ENABLED))
+ if (!acpi_gicc_is_usable(gicc))
return 0;
return -ENODEV;
@@ -2486,8 +2508,7 @@ static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *hea
int maint_irq_mode;
static int first_madt = true;
- /* Skip unusable CPUs */
- if (!(gicc->flags & ACPI_MADT_ENABLED))
+ if (!acpi_gicc_is_usable(gicc))
return 0;
maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
diff --git a/drivers/perf/amlogic/meson_g12_ddr_pmu.c b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
index 8b643888d503..15d52ab3276a 100644
--- a/drivers/perf/amlogic/meson_g12_ddr_pmu.c
+++ b/drivers/perf/amlogic/meson_g12_ddr_pmu.c
@@ -377,6 +377,7 @@ static const struct of_device_id meson_ddr_pmu_dt_match[] = {
},
{}
};
+MODULE_DEVICE_TABLE(of, meson_ddr_pmu_dt_match);
static struct platform_driver g12_ddr_pmu_driver = {
.probe = g12_ddr_pmu_probe,
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 913dc04b3a40..9479e919c063 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -112,7 +112,9 @@
#define CMN_DTM_PMEVCNTSR 0x240
-#define CMN_DTM_UNIT_INFO 0x0910
+#define CMN650_DTM_UNIT_INFO 0x0910
+#define CMN_DTM_UNIT_INFO 0x0960
+#define CMN_DTM_UNIT_INFO_DTC_DOMAIN GENMASK_ULL(1, 0)
#define CMN_DTM_NUM_COUNTERS 4
/* Want more local counters? Why not replicate the whole DTM! Ugh... */
@@ -279,16 +281,13 @@ struct arm_cmn_node {
u16 id, logid;
enum cmn_node_type type;
- int dtm;
- union {
- /* DN/HN-F/CXHA */
- struct {
- u8 val : 4;
- u8 count : 4;
- } occupid[SEL_MAX];
- /* XP */
- u8 dtc;
- };
+ u8 dtm;
+ s8 dtc;
+ /* DN/HN-F/CXHA */
+ struct {
+ u8 val : 4;
+ u8 count : 4;
+ } occupid[SEL_MAX];
union {
u8 event[4];
__le32 event_sel;
@@ -538,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
seq_puts(s, "\n |");
for (x = 0; x < cmn->mesh_x; x++) {
- u8 dtc = cmn->xps[xp_base + x].dtc;
+ s8 dtc = cmn->xps[xp_base + x].dtc;
- if (dtc & (dtc - 1))
+ if (dtc < 0)
seq_puts(s, " DTC ?? |");
else
- seq_printf(s, " DTC %ld |", __ffs(dtc));
+ seq_printf(s, " DTC %d |", dtc);
}
seq_puts(s, "\n |");
for (x = 0; x < cmn->mesh_x; x++)
@@ -587,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
struct arm_cmn_hw_event {
struct arm_cmn_node *dn;
u64 dtm_idx[4];
- unsigned int dtc_idx;
- u8 dtcs_used;
+ s8 dtc_idx[CMN_MAX_DTCS];
u8 num_dns;
u8 dtm_offset;
bool wide_sel;
@@ -598,6 +596,10 @@ struct arm_cmn_hw_event {
#define for_each_hw_dn(hw, dn, i) \
for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
+/* @i is the DTC number, @idx is the counter index on that DTC */
+#define for_each_hw_dtc_idx(hw, i, idx) \
+ for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0)
+
static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
{
BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
@@ -1427,12 +1429,11 @@ static void arm_cmn_init_counter(struct perf_event *event)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
- unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
u64 count;
- for (i = 0; hw->dtcs_used & (1U << i); i++) {
- writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
- cmn->dtc[i].counters[hw->dtc_idx] = event;
+ for_each_hw_dtc_idx(hw, i, idx) {
+ writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx));
+ cmn->dtc[i].counters[idx] = event;
}
count = arm_cmn_read_dtm(cmn, hw, false);
@@ -1445,11 +1446,9 @@ static void arm_cmn_event_read(struct perf_event *event)
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
u64 delta, new, prev;
unsigned long flags;
- unsigned int i;
- if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) {
- i = __ffs(hw->dtcs_used);
- delta = arm_cmn_read_cc(cmn->dtc + i);
+ if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) {
+ delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]);
local64_add(delta, &event->count);
return;
}
@@ -1459,8 +1458,8 @@ static void arm_cmn_event_read(struct perf_event *event)
delta = new - prev;
local_irq_save(flags);
- for (i = 0; hw->dtcs_used & (1U << i); i++) {
- new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
+ for_each_hw_dtc_idx(hw, i, idx) {
+ new = arm_cmn_read_counter(cmn->dtc + i, idx);
delta += new << 16;
}
local_irq_restore(flags);
@@ -1516,7 +1515,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
- i = __ffs(hw->dtcs_used);
+ i = hw->dtc_idx[0];
writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
cmn->dtc[i].cc_active = true;
} else if (type == CMN_TYPE_WP) {
@@ -1547,7 +1546,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
int i;
if (type == CMN_TYPE_DTC) {
- i = __ffs(hw->dtcs_used);
+ i = hw->dtc_idx[0];
cmn->dtc[i].cc_active = false;
} else if (type == CMN_TYPE_WP) {
int wp_idx = arm_cmn_wp_idx(event);
@@ -1571,7 +1570,7 @@ struct arm_cmn_val {
u8 dtm_count[CMN_MAX_DTMS];
u8 occupid[CMN_MAX_DTMS][SEL_MAX];
u8 wp[CMN_MAX_DTMS][4];
- int dtc_count;
+ int dtc_count[CMN_MAX_DTCS];
bool cycles;
};
@@ -1592,7 +1591,8 @@ static void arm_cmn_val_add_event(struct arm_cmn *cmn, struct arm_cmn_val *val,
return;
}
- val->dtc_count++;
+ for_each_hw_dtc_idx(hw, dtc, idx)
+ val->dtc_count[dtc]++;
for_each_hw_dn(hw, dn, i) {
int wp_idx, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1639,8 +1639,9 @@ static int arm_cmn_validate_group(struct arm_cmn *cmn, struct perf_event *event)
goto done;
}
- if (val->dtc_count == CMN_DT_NUM_COUNTERS)
- goto done;
+ for (i = 0; i < CMN_MAX_DTCS; i++)
+ if (val->dtc_count[i] == CMN_DT_NUM_COUNTERS)
+ goto done;
for_each_hw_dn(hw, dn, i) {
int wp_idx, wp_cmb, dtm = dn->dtm, sel = hw->filter_sel;
@@ -1733,12 +1734,19 @@ static int arm_cmn_event_init(struct perf_event *event)
hw->dn = arm_cmn_node(cmn, type);
if (!hw->dn)
return -EINVAL;
+
+ memset(hw->dtc_idx, -1, sizeof(hw->dtc_idx));
for (dn = hw->dn; dn->type == type; dn++) {
if (bynodeid && dn->id != nodeid) {
hw->dn++;
continue;
}
hw->num_dns++;
+ if (dn->dtc < 0)
+ memset(hw->dtc_idx, 0, cmn->num_dtcs);
+ else
+ hw->dtc_idx[dn->dtc] = 0;
+
if (bynodeid)
break;
}
@@ -1750,12 +1758,6 @@ static int arm_cmn_event_init(struct perf_event *event)
nodeid, nid.x, nid.y, nid.port, nid.dev, type);
return -EINVAL;
}
- /*
- * Keep assuming non-cycles events count in all DTC domains; turns out
- * it's hard to make a worthwhile optimisation around this, short of
- * going all-in with domain-local counter allocation as well.
- */
- hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
return arm_cmn_validate_group(cmn, event);
}
@@ -1781,46 +1783,48 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
}
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
- for (i = 0; hw->dtcs_used & (1U << i); i++)
- cmn->dtc[i].counters[hw->dtc_idx] = NULL;
+ for_each_hw_dtc_idx(hw, j, idx)
+ cmn->dtc[j].counters[idx] = NULL;
}
static int arm_cmn_event_add(struct perf_event *event, int flags)
{
struct arm_cmn *cmn = to_cmn(event->pmu);
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
- struct arm_cmn_dtc *dtc = &cmn->dtc[0];
struct arm_cmn_node *dn;
enum cmn_node_type type = CMN_EVENT_TYPE(event);
- unsigned int i, dtc_idx, input_sel;
+ unsigned int input_sel, i = 0;
if (type == CMN_TYPE_DTC) {
- i = 0;
while (cmn->dtc[i].cycles)
if (++i == cmn->num_dtcs)
return -ENOSPC;
cmn->dtc[i].cycles = event;
- hw->dtc_idx = CMN_DT_NUM_COUNTERS;
- hw->dtcs_used = 1U << i;
+ hw->dtc_idx[0] = i;
if (flags & PERF_EF_START)
arm_cmn_event_start(event, 0);
return 0;
}
- /* Grab a free global counter first... */
- dtc_idx = 0;
- while (dtc->counters[dtc_idx])
- if (++dtc_idx == CMN_DT_NUM_COUNTERS)
- return -ENOSPC;
-
- hw->dtc_idx = dtc_idx;
+ /* Grab the global counters first... */
+ for_each_hw_dtc_idx(hw, j, idx) {
+ if (cmn->part == PART_CMN600 && j > 0) {
+ idx = hw->dtc_idx[0];
+ } else {
+ idx = 0;
+ while (cmn->dtc[j].counters[idx])
+ if (++idx == CMN_DT_NUM_COUNTERS)
+ goto free_dtms;
+ }
+ hw->dtc_idx[j] = idx;
+ }
- /* ...then the local counters to feed it. */
+ /* ...then the local counters to feed them */
for_each_hw_dn(hw, dn, i) {
struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
- unsigned int dtm_idx, shift;
+ unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
u64 reg;
dtm_idx = 0;
@@ -1839,11 +1843,11 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
tmp = dtm->wp_event[wp_idx ^ 1];
if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
- CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
+ CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
goto free_dtms;
input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
- dtm->wp_event[wp_idx] = dtc_idx;
+ dtm->wp_event[wp_idx] = hw->dtc_idx[d];
writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
} else {
struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
@@ -1863,7 +1867,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
dtm->input_sel[dtm_idx] = input_sel;
shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
- dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
+ dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, hw->dtc_idx[d]) << shift;
dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low;
writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG);
@@ -1891,7 +1895,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags)
arm_cmn_event_stop(event, PERF_EF_UPDATE);
if (type == CMN_TYPE_DTC)
- cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL;
+ cmn->dtc[hw->dtc_idx[0]].cycles = NULL;
else
arm_cmn_event_clear(cmn, event, hw->num_dns);
}
@@ -2072,7 +2076,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
{
struct arm_cmn_node *dn, *xp;
int dtc_idx = 0;
- u8 dtcs_present = (1 << cmn->num_dtcs) - 1;
cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
if (!cmn->dtc)
@@ -2082,23 +2085,26 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
+ if (cmn->part == PART_CMN600 && cmn->num_dtcs > 1) {
+ /* We do at least know that a DTC's XP must be in that DTC's domain */
+ dn = arm_cmn_node(cmn, CMN_TYPE_DTC);
+ for (int i = 0; i < cmn->num_dtcs; i++)
+ arm_cmn_node_to_xp(cmn, dn + i)->dtc = i;
+ }
+
for (dn = cmn->dns; dn->type; dn++) {
- if (dn->type == CMN_TYPE_XP) {
- dn->dtc &= dtcs_present;
+ if (dn->type == CMN_TYPE_XP)
continue;
- }
xp = arm_cmn_node_to_xp(cmn, dn);
+ dn->dtc = xp->dtc;
dn->dtm = xp->dtm;
if (cmn->multi_dtm)
dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
if (dn->type == CMN_TYPE_DTC) {
- int err;
- /* We do at least know that a DTC's XP must be in that DTC's domain */
- if (xp->dtc == 0xf)
- xp->dtc = 1 << dtc_idx;
- err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+ int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+
if (err)
return err;
}
@@ -2117,6 +2123,16 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
return 0;
}
+static unsigned int arm_cmn_dtc_domain(struct arm_cmn *cmn, void __iomem *xp_region)
+{
+ int offset = CMN_DTM_UNIT_INFO;
+
+ if (cmn->part == PART_CMN650 || cmn->part == PART_CI700)
+ offset = CMN650_DTM_UNIT_INFO;
+
+ return FIELD_GET(CMN_DTM_UNIT_INFO_DTC_DOMAIN, readl_relaxed(xp_region + offset));
+}
+
static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
{
int level;
@@ -2246,9 +2262,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
cmn->mesh_x = xp->logid;
if (cmn->part == PART_CMN600)
- xp->dtc = 0xf;
+ xp->dtc = -1;
else
- xp->dtc = 1 << readl_relaxed(xp_region + CMN_DTM_UNIT_INFO);
+ xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
xp->dtm = dtm - cmn->dtms;
arm_cmn_init_dtm(dtm++, xp, 0);
diff --git a/drivers/perf/arm_cspmu/Kconfig b/drivers/perf/arm_cspmu/Kconfig
index 25d25ded0983..6f4e28fc84a2 100644
--- a/drivers/perf/arm_cspmu/Kconfig
+++ b/drivers/perf/arm_cspmu/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
#
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
tristate "ARM Coresight Architecture PMU"
@@ -10,3 +10,20 @@ config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
based on ARM CoreSight PMU architecture. Note that this PMU
architecture does not have relationship with the ARM CoreSight
Self-Hosted Tracing.
+
+config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ tristate "NVIDIA Coresight Architecture PMU"
+ depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ help
+ Provides NVIDIA specific attributes for performance monitoring unit
+ (PMU) devices based on ARM CoreSight PMU architecture.
+
+config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ tristate "Ampere Coresight Architecture PMU"
+ depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+ help
+ Provides Ampere specific attributes for performance monitoring unit
+ (PMU) devices based on ARM CoreSight PMU architecture.
+
+ In the first phase, the driver enables support on MCU PMU used in
+ AmpereOne SoC family.
diff --git a/drivers/perf/arm_cspmu/Makefile b/drivers/perf/arm_cspmu/Makefile
index fedb17df982d..220a734efd54 100644
--- a/drivers/perf/arm_cspmu/Makefile
+++ b/drivers/perf/arm_cspmu/Makefile
@@ -1,6 +1,10 @@
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
-arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
+
+arm_cspmu_module-y := arm_cspmu.o
+
+obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
+obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c
new file mode 100644
index 000000000000..f146a455e838
--- /dev/null
+++ b/drivers/perf/arm_cspmu/ampere_cspmu.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ampere SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2023, Ampere Computing LLC
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include "arm_cspmu.h"
+
+#define PMAUXR0 0xD80
+#define PMAUXR1 0xD84
+#define PMAUXR2 0xD88
+#define PMAUXR3 0xD8C
+
+#define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
+
+struct ampere_cspmu_ctx {
+ const char *name;
+ struct attribute **event_attr;
+ struct attribute **format_attr;
+};
+
+static DEFINE_IDA(mcu_pmu_ida);
+
+#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \
+ static inline u32 get_##_name(const struct perf_event *event) \
+ { \
+ return FIELD_GET(GENMASK_ULL(_end, _start), \
+ event->attr._config); \
+ } \
+
+SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55);
+
+static struct attribute *ampereone_mcu_pmu_event_attrs[] = {
+ ARM_CSPMU_EVENT_ATTR(cycle_count, 0x00),
+ ARM_CSPMU_EVENT_ATTR(act_sent, 0x01),
+ ARM_CSPMU_EVENT_ATTR(pre_sent, 0x02),
+ ARM_CSPMU_EVENT_ATTR(rd_sent, 0x03),
+ ARM_CSPMU_EVENT_ATTR(rda_sent, 0x04),
+ ARM_CSPMU_EVENT_ATTR(wr_sent, 0x05),
+ ARM_CSPMU_EVENT_ATTR(wra_sent, 0x06),
+ ARM_CSPMU_EVENT_ATTR(pd_entry_vld, 0x07),
+ ARM_CSPMU_EVENT_ATTR(sref_entry_vld, 0x08),
+ ARM_CSPMU_EVENT_ATTR(prea_sent, 0x09),
+ ARM_CSPMU_EVENT_ATTR(pre_sb_sent, 0x0a),
+ ARM_CSPMU_EVENT_ATTR(ref_sent, 0x0b),
+ ARM_CSPMU_EVENT_ATTR(rfm_sent, 0x0c),
+ ARM_CSPMU_EVENT_ATTR(ref_sb_sent, 0x0d),
+ ARM_CSPMU_EVENT_ATTR(rfm_sb_sent, 0x0e),
+ ARM_CSPMU_EVENT_ATTR(rd_rda_sent, 0x0f),
+ ARM_CSPMU_EVENT_ATTR(wr_wra_sent, 0x10),
+ ARM_CSPMU_EVENT_ATTR(raw_hazard, 0x11),
+ ARM_CSPMU_EVENT_ATTR(war_hazard, 0x12),
+ ARM_CSPMU_EVENT_ATTR(waw_hazard, 0x13),
+ ARM_CSPMU_EVENT_ATTR(rar_hazard, 0x14),
+ ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard, 0x15),
+ ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld, 0x16),
+ ARM_CSPMU_EVENT_ATTR(lprd_req_vld, 0x17),
+ ARM_CSPMU_EVENT_ATTR(hprd_req_vld, 0x18),
+ ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld, 0x19),
+ ARM_CSPMU_EVENT_ATTR(prefetch_tgt, 0x1a),
+ ARM_CSPMU_EVENT_ATTR(wr_req_vld, 0x1b),
+ ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld, 0x1c),
+ ARM_CSPMU_EVENT_ATTR(rd_retry, 0x1d),
+ ARM_CSPMU_EVENT_ATTR(wr_retry, 0x1e),
+ ARM_CSPMU_EVENT_ATTR(retry_gnt, 0x1f),
+ ARM_CSPMU_EVENT_ATTR(rank_change, 0x20),
+ ARM_CSPMU_EVENT_ATTR(dir_change, 0x21),
+ ARM_CSPMU_EVENT_ATTR(rank_dir_change, 0x22),
+ ARM_CSPMU_EVENT_ATTR(rank_active, 0x23),
+ ARM_CSPMU_EVENT_ATTR(rank_idle, 0x24),
+ ARM_CSPMU_EVENT_ATTR(rank_pd, 0x25),
+ ARM_CSPMU_EVENT_ATTR(rank_sref, 0x26),
+ ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh, 0x27),
+ ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh, 0x28),
+ ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh, 0x29),
+ ARM_CSPMU_EVENT_ATTR(phy_updt_complt, 0x2a),
+ ARM_CSPMU_EVENT_ATTR(tz_fail, 0x2b),
+ ARM_CSPMU_EVENT_ATTR(dram_errc, 0x2c),
+ ARM_CSPMU_EVENT_ATTR(dram_errd, 0x2d),
+ ARM_CSPMU_EVENT_ATTR(read_data_return, 0x32),
+ ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta, 0x33),
+ ARM_CSPMU_EVENT_ATTR(zq_start, 0x34),
+ ARM_CSPMU_EVENT_ATTR(zq_latch, 0x35),
+ ARM_CSPMU_EVENT_ATTR(wr_fifo_full, 0x36),
+ ARM_CSPMU_EVENT_ATTR(info_fifo_full, 0x37),
+ ARM_CSPMU_EVENT_ATTR(cmd_fifo_full, 0x38),
+ ARM_CSPMU_EVENT_ATTR(dfi_nop, 0x39),
+ ARM_CSPMU_EVENT_ATTR(dfi_cmd, 0x3a),
+ ARM_CSPMU_EVENT_ATTR(rd_run_len, 0x3b),
+ ARM_CSPMU_EVENT_ATTR(wr_run_len, 0x3c),
+
+ ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+ NULL,
+};
+
+static struct attribute *ampereone_mcu_format_attrs[] = {
+ ARM_CSPMU_FORMAT_EVENT_ATTR,
+ ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"),
+ ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"),
+ ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"),
+ NULL,
+};
+
+static struct attribute **
+ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+ const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+ return ctx->event_attr;
+}
+
+static struct attribute **
+ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+ const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+ return ctx->format_attr;
+}
+
+static const char *
+ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+ const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+ return ctx->name;
+}
+
+static u32 ampere_cspmu_event_filter(const struct perf_event *event)
+{
+ /*
+ * PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked
+ * as RES0. Make sure, PMCCFILTR is written zero.
+ */
+ return 0;
+}
+
+static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ struct hw_perf_event *hwc,
+ u32 filter)
+{
+ struct perf_event *event;
+ unsigned int idx;
+ u32 threshold, rank, bank;
+
+ /*
+ * At this point, all the events have the same filter settings.
+ * Therefore, take the first event and use its configuration.
+ */
+ idx = find_first_bit(cspmu->hw_events.used_ctrs,
+ cspmu->cycle_counter_logical_idx);
+
+ event = cspmu->hw_events.events[idx];
+
+ threshold = get_threshold(event);
+ rank = get_rank(event);
+ bank = get_bank(event);
+
+ writel(threshold, cspmu->base0 + PMAUXR0);
+ writel(rank, cspmu->base0 + PMAUXR1);
+ writel(bank, cspmu->base0 + PMAUXR2);
+}
+
+static int ampere_cspmu_validate_configs(struct perf_event *event,
+ struct perf_event *event2)
+{
+ if (get_threshold(event) != get_threshold(event2) ||
+ get_rank(event) != get_rank(event2) ||
+ get_bank(event) != get_bank(event2))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
+ struct perf_event *new)
+{
+ struct perf_event *curr, *leader = new->group_leader;
+ unsigned int idx;
+ int ret;
+
+ ret = ampere_cspmu_validate_configs(new, leader);
+ if (ret)
+ return ret;
+
+ /* We compare the global filter settings to the existing events */
+ idx = find_first_bit(cspmu->hw_events.used_ctrs,
+ cspmu->cycle_counter_logical_idx);
+
+ /* This is the first event, thus any configuration is fine */
+ if (idx == cspmu->cycle_counter_logical_idx)
+ return 0;
+
+ curr = cspmu->hw_events.events[idx];
+
+ return ampere_cspmu_validate_configs(curr, new);
+}
+
+static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
+ const char *name_pattern)
+{
+ struct device *dev = cspmu->dev;
+ int id;
+
+ id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL);
+ if (id < 0)
+ return ERR_PTR(id);
+
+ return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id);
+}
+
+static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+ struct device *dev = cspmu->dev;
+ struct ampere_cspmu_ctx *ctx;
+ struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+ ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->event_attr = ampereone_mcu_pmu_event_attrs;
+ ctx->format_attr = ampereone_mcu_format_attrs;
+ ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d");
+ if (IS_ERR_OR_NULL(ctx->name))
+ return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM;
+
+ cspmu->impl.ctx = ctx;
+
+ impl_ops->event_filter = ampere_cspmu_event_filter;
+ impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter;
+ impl_ops->validate_event = ampere_cspmu_validate_event;
+ impl_ops->get_name = ampere_cspmu_get_name;
+ impl_ops->get_event_attrs = ampere_cspmu_get_event_attrs;
+ impl_ops->get_format_attrs = ampere_cspmu_get_format_attrs;
+
+ return 0;
+}
+
+/* Match all Ampere Coresight PMU devices */
+static const struct arm_cspmu_impl_match ampere_cspmu_param = {
+ .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
+ .module = THIS_MODULE,
+ .impl_init_ops = ampere_cspmu_init_ops
+};
+
+static int __init ampere_cspmu_init(void)
+{
+ int ret;
+
+ ret = arm_cspmu_impl_register(&ampere_cspmu_param);
+ if (ret)
+ pr_err("ampere_cspmu backend registration error: %d\n", ret);
+
+ return ret;
+}
+
+static void __exit ampere_cspmu_exit(void)
+{
+ arm_cspmu_impl_unregister(&ampere_cspmu_param);
+}
+
+module_init(ampere_cspmu_init);
+module_exit(ampere_cspmu_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index e2b7827c4563..0e3fe00d741d 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -16,7 +16,7 @@
* The user should refer to the vendor technical documentation to get details
* about the supported events.
*
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
@@ -26,11 +26,11 @@
#include <linux/interrupt.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include "arm_cspmu.h"
-#include "nvidia_cspmu.h"
#define PMUNAME "arm_cspmu"
#define DRVNAME "arm-cs-arch-pmu"
@@ -112,11 +112,13 @@
*/
#define HILOHI_MAX_POLL 1000
-/* JEDEC-assigned JEP106 identification code */
-#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
-
static unsigned long arm_cspmu_cpuhp_state;
+static DEFINE_MUTEX(arm_cspmu_lock);
+
+static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ struct hw_perf_event *hwc, u32 filter);
+
static struct acpi_apmt_node *arm_cspmu_apmt_node(struct device *dev)
{
return *(struct acpi_apmt_node **)dev_get_platdata(dev);
@@ -373,27 +375,45 @@ static struct attribute_group arm_cspmu_cpumask_attr_group = {
.attrs = arm_cspmu_cpumask_attrs,
};
-struct impl_match {
- u32 pmiidr;
- u32 mask;
- int (*impl_init_ops)(struct arm_cspmu *cspmu);
-};
-
-static const struct impl_match impl_match[] = {
+static struct arm_cspmu_impl_match impl_match[] = {
{
- .pmiidr = ARM_CSPMU_IMPL_ID_NVIDIA,
- .mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
- .impl_init_ops = nv_cspmu_init_ops
+ .module_name = "nvidia_cspmu",
+ .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA,
+ .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+ .module = NULL,
+ .impl_init_ops = NULL,
},
- {}
+ {
+ .module_name = "ampere_cspmu",
+ .pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
+ .pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+ .module = NULL,
+ .impl_init_ops = NULL,
+ },
+
+ {0}
};
+static struct arm_cspmu_impl_match *arm_cspmu_impl_match_get(u32 pmiidr)
+{
+ struct arm_cspmu_impl_match *match = impl_match;
+
+ for (; match->pmiidr_val; match++) {
+ u32 mask = match->pmiidr_mask;
+
+ if ((match->pmiidr_val & mask) == (pmiidr & mask))
+ return match;
+ }
+
+ return NULL;
+}
+
static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
{
- int ret;
+ int ret = 0;
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
struct acpi_apmt_node *apmt_node = arm_cspmu_apmt_node(cspmu->dev);
- const struct impl_match *match = impl_match;
+ struct arm_cspmu_impl_match *match;
/*
* Get PMU implementer and product id from APMT node.
@@ -405,17 +425,36 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
readl(cspmu->base0 + PMIIDR);
/* Find implementer specific attribute ops. */
- for (; match->pmiidr; match++) {
- const u32 mask = match->mask;
+ match = arm_cspmu_impl_match_get(cspmu->impl.pmiidr);
+
+ /* Load implementer module and initialize the callbacks. */
+ if (match) {
+ mutex_lock(&arm_cspmu_lock);
+
+ if (match->impl_init_ops) {
+ /* Prevent unload until PMU registration is done. */
+ if (try_module_get(match->module)) {
+ cspmu->impl.module = match->module;
+ cspmu->impl.match = match;
+ ret = match->impl_init_ops(cspmu);
+ if (ret)
+ module_put(match->module);
+ } else {
+ WARN(1, "arm_cspmu failed to get module: %s\n",
+ match->module_name);
+ ret = -EINVAL;
+ }
+ } else {
+ request_module_nowait(match->module_name);
+ ret = -EPROBE_DEFER;
+ }
- if ((match->pmiidr & mask) == (cspmu->impl.pmiidr & mask)) {
- ret = match->impl_init_ops(cspmu);
- if (ret)
- return ret;
+ mutex_unlock(&arm_cspmu_lock);
- break;
- }
- }
+ if (ret)
+ return ret;
+ } else
+ cspmu->impl.module = THIS_MODULE;
/* Use default callbacks if implementer doesn't provide one. */
CHECK_DEFAULT_IMPL_OPS(impl_ops, get_event_attrs);
@@ -426,6 +465,7 @@ static int arm_cspmu_init_impl_ops(struct arm_cspmu *cspmu)
CHECK_DEFAULT_IMPL_OPS(impl_ops, event_type);
CHECK_DEFAULT_IMPL_OPS(impl_ops, event_filter);
CHECK_DEFAULT_IMPL_OPS(impl_ops, event_attr_is_visible);
+ CHECK_DEFAULT_IMPL_OPS(impl_ops, set_ev_filter);
return 0;
}
@@ -478,11 +518,6 @@ arm_cspmu_alloc_attr_group(struct arm_cspmu *cspmu)
struct attribute_group **attr_groups = NULL;
struct device *dev = cspmu->dev;
const struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
- int ret;
-
- ret = arm_cspmu_init_impl_ops(cspmu);
- if (ret)
- return NULL;
cspmu->identifier = impl_ops->get_identifier(cspmu);
cspmu->name = impl_ops->get_name(cspmu);
@@ -549,7 +584,7 @@ static void arm_cspmu_disable(struct pmu *pmu)
static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
struct perf_event *event)
{
- int idx;
+ int idx, ret;
struct arm_cspmu *cspmu = to_arm_cspmu(event->pmu);
if (supports_cycle_counter(cspmu)) {
@@ -583,6 +618,12 @@ static int arm_cspmu_get_event_idx(struct arm_cspmu_hw_events *hw_events,
if (idx >= cspmu->num_logical_ctrs)
return -EAGAIN;
+ if (cspmu->impl.ops.validate_event) {
+ ret = cspmu->impl.ops.validate_event(cspmu, event);
+ if (ret)
+ return ret;
+ }
+
set_bit(idx, hw_events->used_ctrs);
return idx;
@@ -696,7 +737,10 @@ static void arm_cspmu_write_counter(struct perf_event *event, u64 val)
if (use_64b_counter_reg(cspmu)) {
offset = counter_offset(sizeof(u64), event->hw.idx);
- writeq(val, cspmu->base1 + offset);
+ if (cspmu->has_atomic_dword)
+ writeq(val, cspmu->base1 + offset);
+ else
+ lo_hi_writeq(val, cspmu->base1 + offset);
} else {
offset = counter_offset(sizeof(u32), event->hw.idx);
@@ -789,9 +833,9 @@ static inline void arm_cspmu_set_event(struct arm_cspmu *cspmu,
writel(hwc->config, cspmu->base0 + offset);
}
-static inline void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
- struct hw_perf_event *hwc,
- u32 filter)
+static void arm_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+ struct hw_perf_event *hwc,
+ u32 filter)
{
u32 offset = PMEVFILTR + (4 * hwc->idx);
@@ -823,7 +867,7 @@ static void arm_cspmu_start(struct perf_event *event, int pmu_flags)
arm_cspmu_set_cc_filter(cspmu, filter);
} else {
arm_cspmu_set_event(cspmu, hwc);
- arm_cspmu_set_ev_filter(cspmu, hwc, filter);
+ cspmu->impl.ops.set_ev_filter(cspmu, hwc, filter);
}
hwc->state = 0;
@@ -1149,7 +1193,7 @@ static int arm_cspmu_register_pmu(struct arm_cspmu *cspmu)
cspmu->pmu = (struct pmu){
.task_ctx_nr = perf_invalid_context,
- .module = THIS_MODULE,
+ .module = cspmu->impl.module,
.pmu_enable = arm_cspmu_enable,
.pmu_disable = arm_cspmu_disable,
.event_init = arm_cspmu_event_init,
@@ -1196,11 +1240,17 @@ static int arm_cspmu_device_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = arm_cspmu_register_pmu(cspmu);
+ ret = arm_cspmu_init_impl_ops(cspmu);
if (ret)
return ret;
- return 0;
+ ret = arm_cspmu_register_pmu(cspmu);
+
+ /* Matches arm_cspmu_init_impl_ops() above. */
+ if (cspmu->impl.module != THIS_MODULE)
+ module_put(cspmu->impl.module);
+
+ return ret;
}
static int arm_cspmu_device_remove(struct platform_device *pdev)
@@ -1300,6 +1350,75 @@ static void __exit arm_cspmu_exit(void)
cpuhp_remove_multi_state(arm_cspmu_cpuhp_state);
}
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match)
+{
+ struct arm_cspmu_impl_match *match;
+ int ret = 0;
+
+ match = arm_cspmu_impl_match_get(impl_match->pmiidr_val);
+
+ if (match) {
+ mutex_lock(&arm_cspmu_lock);
+
+ if (!match->impl_init_ops) {
+ match->module = impl_match->module;
+ match->impl_init_ops = impl_match->impl_init_ops;
+ } else {
+ /* Broken match table may contain non-unique entries */
+ WARN(1, "arm_cspmu backend already registered for module: %s, pmiidr: 0x%x, mask: 0x%x\n",
+ match->module_name,
+ match->pmiidr_val,
+ match->pmiidr_mask);
+
+ ret = -EINVAL;
+ }
+
+ mutex_unlock(&arm_cspmu_lock);
+
+ if (!ret)
+ ret = driver_attach(&arm_cspmu_driver.driver);
+ } else {
+ pr_err("arm_cspmu reg failed, unable to find a match for pmiidr: 0x%x\n",
+ impl_match->pmiidr_val);
+
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_register);
+
+static int arm_cspmu_match_device(struct device *dev, const void *match)
+{
+ struct arm_cspmu *cspmu = platform_get_drvdata(to_platform_device(dev));
+
+ return (cspmu && cspmu->impl.match == match) ? 1 : 0;
+}
+
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match)
+{
+ struct device *dev;
+ struct arm_cspmu_impl_match *match;
+
+ match = arm_cspmu_impl_match_get(impl_match->pmiidr_val);
+
+ if (WARN_ON(!match))
+ return;
+
+ /* Unbind the driver from all matching backend devices. */
+ while ((dev = driver_find_device(&arm_cspmu_driver.driver, NULL,
+ match, arm_cspmu_match_device)))
+ device_release_driver(dev);
+
+ mutex_lock(&arm_cspmu_lock);
+
+ match->module = NULL;
+ match->impl_init_ops = NULL;
+
+ mutex_unlock(&arm_cspmu_lock);
+}
+EXPORT_SYMBOL_GPL(arm_cspmu_impl_unregister);
+
module_init(arm_cspmu_init);
module_exit(arm_cspmu_exit);
diff --git a/drivers/perf/arm_cspmu/arm_cspmu.h b/drivers/perf/arm_cspmu/arm_cspmu.h
index 83df53d1c132..2fe723555a6b 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.h
+++ b/drivers/perf/arm_cspmu/arm_cspmu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0
*
* ARM CoreSight Architecture PMU driver.
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
@@ -69,6 +69,10 @@
#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0)
#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20)
+/* JEDEC-assigned JEP106 identification code */
+#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
+#define ARM_CSPMU_IMPL_ID_AMPERE 0xA16
+
struct arm_cspmu;
/* This tracks the events assigned to each counter in the PMU. */
@@ -101,14 +105,34 @@ struct arm_cspmu_impl_ops {
u32 (*event_type)(const struct perf_event *event);
/* Decode filter value from configs */
u32 (*event_filter)(const struct perf_event *event);
+ /* Set event filter */
+ void (*set_ev_filter)(struct arm_cspmu *cspmu,
+ struct hw_perf_event *hwc, u32 filter);
+ /* Implementation specific event validation */
+ int (*validate_event)(struct arm_cspmu *cspmu,
+ struct perf_event *event);
/* Hide/show unsupported events */
umode_t (*event_attr_is_visible)(struct kobject *kobj,
struct attribute *attr, int unused);
};
+/* Vendor/implementer registration parameter. */
+struct arm_cspmu_impl_match {
+ /* Backend module. */
+ struct module *module;
+ const char *module_name;
+ /* PMIIDR value/mask. */
+ u32 pmiidr_val;
+ u32 pmiidr_mask;
+ /* Callback to vendor backend to init arm_cspmu_impl::ops. */
+ int (*impl_init_ops)(struct arm_cspmu *cspmu);
+};
+
/* Vendor/implementer descriptor. */
struct arm_cspmu_impl {
u32 pmiidr;
+ struct module *module;
+ struct arm_cspmu_impl_match *match;
struct arm_cspmu_impl_ops ops;
void *ctx;
};
@@ -147,4 +171,10 @@ ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
struct device_attribute *attr,
char *buf);
+/* Register vendor backend. */
+int arm_cspmu_impl_register(const struct arm_cspmu_impl_match *impl_match);
+
+/* Unregister vendor backend. */
+void arm_cspmu_impl_unregister(const struct arm_cspmu_impl_match *impl_match);
+
#endif /* __ARM_CSPMU_H__ */
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.c b/drivers/perf/arm_cspmu/nvidia_cspmu.c
index 72ef80caa3c8..0382b702f092 100644
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -1,14 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
/* Support for NVIDIA specific attributes. */
+#include <linux/module.h>
#include <linux/topology.h>
-#include "nvidia_cspmu.h"
+#include "arm_cspmu.h"
#define NV_PCIE_PORT_COUNT 10ULL
#define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
@@ -351,7 +352,7 @@ static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
return name;
}
-int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
+static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
{
u32 prodid;
struct nv_cspmu_ctx *ctx;
@@ -395,6 +396,31 @@ int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
return 0;
}
-EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
+
+/* Match all NVIDIA Coresight PMU devices */
+static const struct arm_cspmu_impl_match nv_cspmu_param = {
+ .pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA,
+ .module = THIS_MODULE,
+ .impl_init_ops = nv_cspmu_init_ops
+};
+
+static int __init nvidia_cspmu_init(void)
+{
+ int ret;
+
+ ret = arm_cspmu_impl_register(&nv_cspmu_param);
+ if (ret)
+ pr_err("nvidia_cspmu backend registration error: %d\n", ret);
+
+ return ret;
+}
+
+static void __exit nvidia_cspmu_exit(void)
+{
+ arm_cspmu_impl_unregister(&nv_cspmu_param);
+}
+
+module_init(nvidia_cspmu_init);
+module_exit(nvidia_cspmu_exit);
MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_cspmu/nvidia_cspmu.h b/drivers/perf/arm_cspmu/nvidia_cspmu.h
deleted file mode 100644
index 71e18f0dc50b..000000000000
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- *
- */
-
-/* Support for NVIDIA specific attributes. */
-
-#ifndef __NVIDIA_CSPMU_H__
-#define __NVIDIA_CSPMU_H__
-
-#include "arm_cspmu.h"
-
-/* Allocate NVIDIA descriptor. */
-int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
-
-#endif /* __NVIDIA_CSPMU_H__ */
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 8fcaa26f0f8a..4f6923ad4589 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -1126,7 +1126,7 @@ static void __armv8pmu_probe_pmu(void *info)
pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
/* store PMMIR register for sysfs */
- if (is_pmuv3p4(pmuver) && (pmceid_raw[1] & BIT(31)))
+ if (is_pmuv3p4(pmuver))
cpu_pmu->reg_pmmir = read_pmmir();
else
cpu_pmu->reg_pmmir = 0;
@@ -1187,10 +1187,7 @@ static void armv8_pmu_register_sysctl_table(void)
}
static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
- int (*map_event)(struct perf_event *event),
- const struct attribute_group *events,
- const struct attribute_group *format,
- const struct attribute_group *caps)
+ int (*map_event)(struct perf_event *event))
{
int ret = armv8pmu_probe_pmu(cpu_pmu);
if (ret)
@@ -1212,27 +1209,17 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->name = name;
cpu_pmu->map_event = map_event;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = events ?
- events : &armv8_pmuv3_events_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ?
- format : &armv8_pmuv3_format_attr_group;
- cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ?
- caps : &armv8_pmuv3_caps_attr_group;
-
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &armv8_pmuv3_events_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &armv8_pmuv3_format_attr_group;
+ cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = &armv8_pmuv3_caps_attr_group;
armv8_pmu_register_sysctl_table();
return 0;
}
-static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name,
- int (*map_event)(struct perf_event *event))
-{
- return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL);
-}
-
#define PMUV3_INIT_SIMPLE(name) \
static int name##_pmu_init(struct arm_pmu *cpu_pmu) \
{ \
- return armv8_pmu_init_nogroups(cpu_pmu, #name, armv8_pmuv3_map_event);\
+ return armv8_pmu_init(cpu_pmu, #name, armv8_pmuv3_map_event); \
}
PMUV3_INIT_SIMPLE(armv8_pmuv3)
@@ -1263,44 +1250,37 @@ PMUV3_INIT_SIMPLE(armv8_nvidia_denver)
static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
{
- return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35",
- armv8_a53_map_event);
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35", armv8_a53_map_event);
}
static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
{
- return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53",
- armv8_a53_map_event);
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53", armv8_a53_map_event);
}
static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
{
- return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57",
- armv8_a57_map_event);
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57", armv8_a57_map_event);
}
static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
{
- return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72",
- armv8_a57_map_event);
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72", armv8_a57_map_event);
}
static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
{
- return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73",
- armv8_a73_map_event);
+ return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73", armv8_a73_map_event);
}
static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
{
- return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder",
- armv8_thunder_map_event);
+ return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder", armv8_thunder_map_event);
}
static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
{
- return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan",
- armv8_vulcan_map_event);
+ return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan", armv8_vulcan_map_event);
}
static const struct of_device_id armv8_pmu_of_device_ids[] = {
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 5a00adb2de8c..b90ba8aca3fa 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -353,16 +353,15 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event)
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
- event->cpu = pcie_pmu->on_cpu;
+ /* Check the type first before going on, otherwise it's not our event */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
if (EXT_COUNTER_IS_USED(hisi_pcie_get_event(event)))
hwc->event_base = HISI_PCIE_EXT_CNT;
else
hwc->event_base = HISI_PCIE_CNT;
- if (event->attr.type != event->pmu->type)
- return -ENOENT;
-
/* Sampling is not supported. */
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
return -EOPNOTSUPP;
@@ -373,6 +372,8 @@ static int hisi_pcie_pmu_event_init(struct perf_event *event)
if (!hisi_pcie_pmu_validate_event_group(event))
return -EINVAL;
+ event->cpu = pcie_pmu->on_cpu;
+
return 0;
}
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
index d941e746b424..797cf201996a 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -505,8 +505,8 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
ret = perf_pmu_register(&pa_pmu->pmu, name, -1);
if (ret) {
dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
- cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
- &pa_pmu->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+ &pa_pmu->node);
return ret;
}
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
index 6fe534a665ed..e706ca567676 100644
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -450,8 +450,8 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
ret = perf_pmu_register(&sllc_pmu->pmu, name, -1);
if (ret) {
dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret);
- cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
- &sllc_pmu->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+ &sllc_pmu->node);
return ret;
}
diff --git a/drivers/perf/hisilicon/hns3_pmu.c b/drivers/perf/hisilicon/hns3_pmu.c
index e0457d84af6b..16869bf5bf4c 100644
--- a/drivers/perf/hisilicon/hns3_pmu.c
+++ b/drivers/perf/hisilicon/hns3_pmu.c
@@ -1556,8 +1556,8 @@ static int hns3_pmu_init_pmu(struct pci_dev *pdev, struct hns3_pmu *hns3_pmu)
ret = perf_pmu_register(&hns3_pmu->pmu, hns3_pmu->pmu.name, -1);
if (ret) {
pci_err(pdev, "failed to register perf PMU, ret = %d.\n", ret);
- cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
- &hns3_pmu->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+ &hns3_pmu->node);
}
return ret;
@@ -1568,8 +1568,8 @@ static void hns3_pmu_uninit_pmu(struct pci_dev *pdev)
struct hns3_pmu *hns3_pmu = pci_get_drvdata(pdev);
perf_pmu_unregister(&hns3_pmu->pmu);
- cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
- &hns3_pmu->node);
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HNS3_PMU_ONLINE,
+ &hns3_pmu->node);
}
static int hns3_pmu_init_dev(struct pci_dev *pdev)
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 9972bfc11a5c..7ce344248dda 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -16,11 +16,9 @@
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
-#include <linux/of_fdt.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/slab.h>
@@ -1731,6 +1729,12 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = {
.id = PCP_PMU_V2,
};
+#ifdef CONFIG_ACPI
+static const struct xgene_pmu_data xgene_pmu_v3_data = {
+ .id = PCP_PMU_V3,
+};
+#endif
+
static const struct xgene_pmu_ops xgene_pmu_ops = {
.mask_int = xgene_pmu_mask_int,
.unmask_int = xgene_pmu_unmask_int,
@@ -1773,9 +1777,9 @@ static const struct of_device_id xgene_pmu_of_match[] = {
MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id xgene_pmu_acpi_match[] = {
- {"APMC0D5B", PCP_PMU_V1},
- {"APMC0D5C", PCP_PMU_V2},
- {"APMC0D83", PCP_PMU_V3},
+ {"APMC0D5B", (kernel_ulong_t)&xgene_pmu_data},
+ {"APMC0D5C", (kernel_ulong_t)&xgene_pmu_v2_data},
+ {"APMC0D83", (kernel_ulong_t)&xgene_pmu_v3_data},
{},
};
MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
@@ -1831,7 +1835,6 @@ static int xgene_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
static int xgene_pmu_probe(struct platform_device *pdev)
{
const struct xgene_pmu_data *dev_data;
- const struct of_device_id *of_id;
struct xgene_pmu *xgene_pmu;
int irq, rc;
int version;
@@ -1850,24 +1853,10 @@ static int xgene_pmu_probe(struct platform_device *pdev)
xgene_pmu->dev = &pdev->dev;
platform_set_drvdata(pdev, xgene_pmu);
- version = -EINVAL;
- of_id = of_match_device(xgene_pmu_of_match, &pdev->dev);
- if (of_id) {
- dev_data = (const struct xgene_pmu_data *) of_id->data;
- version = dev_data->id;
- }
-
-#ifdef CONFIG_ACPI
- if (ACPI_COMPANION(&pdev->dev)) {
- const struct acpi_device_id *acpi_id;
-
- acpi_id = acpi_match_device(xgene_pmu_acpi_match, &pdev->dev);
- if (acpi_id)
- version = (int) acpi_id->driver_data;
- }
-#endif
- if (version < 0)
+ dev_data = device_get_match_data(&pdev->dev);
+ if (!dev_data)
return -ENODEV;
+ version = dev_data->id;
if (version == PCP_PMU_V3)
xgene_pmu->ops = &xgene_pmu_v3_ops;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index a73246c3c35e..fae2aa0b5e8b 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -256,6 +256,11 @@ acpi_table_parse_cedt(enum acpi_cedt_type id,
int acpi_parse_mcfg (struct acpi_table_header *header);
void acpi_table_print_madt_entry (struct acpi_subtable_header *madt);
+static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc)
+{
+ return gicc->flags & ACPI_MADT_ENABLED;
+}
+
/* the following numa functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index 4328895dfc87..547d077e3517 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -473,6 +473,13 @@ function _start
// mov x8, #__NR_sched_yield // Encourage preemption
// svc #0
+#ifdef SSVE
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=0,SM=1
+ and x1, x0, #3
+ cmp x1, #1
+ b.ne svcr_barf
+#endif
+
mov x21, #0
0: mov x0, x21
bl check_zreg
@@ -553,3 +560,15 @@ function vl_barf
mov x1, #1
svc #0
endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction