summaryrefslogtreecommitdiff
path: root/arch/powerpc/perf
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r--arch/powerpc/perf/8xx-pmu.c23
-rw-r--r--arch/powerpc/perf/Makefile7
-rw-r--r--arch/powerpc/perf/bhrb.S2
-rw-r--r--arch/powerpc/perf/callchain.c382
-rw-r--r--arch/powerpc/perf/callchain.h35
-rw-r--r--arch/powerpc/perf/callchain_32.c178
-rw-r--r--arch/powerpc/perf/callchain_64.c120
-rw-r--r--arch/powerpc/perf/core-book3s.c531
-rw-r--r--arch/powerpc/perf/core-fsl-emb.c33
-rw-r--r--arch/powerpc/perf/e500-pmu.c9
-rw-r--r--arch/powerpc/perf/e6500-pmu.c5
-rw-r--r--arch/powerpc/perf/generic-compat-pmu.c190
-rw-r--r--arch/powerpc/perf/hv-24x7.c228
-rw-r--r--arch/powerpc/perf/hv-gpci-requests.h10
-rw-r--r--arch/powerpc/perf/hv-gpci.c781
-rw-r--r--arch/powerpc/perf/hv-gpci.h28
-rw-r--r--arch/powerpc/perf/imc-pmu.c342
-rw-r--r--arch/powerpc/perf/internal.h18
-rw-r--r--arch/powerpc/perf/isa207-common.c408
-rw-r--r--arch/powerpc/perf/isa207-common.h86
-rw-r--r--arch/powerpc/perf/mpc7450-pmu.c33
-rw-r--r--arch/powerpc/perf/perf_regs.c56
-rw-r--r--arch/powerpc/perf/power10-events-list.h79
-rw-r--r--arch/powerpc/perf/power10-pmu.c663
-rw-r--r--arch/powerpc/perf/power5+-pmu.c32
-rw-r--r--arch/powerpc/perf/power5-pmu.c31
-rw-r--r--arch/powerpc/perf/power6-pmu.c76
-rw-r--r--arch/powerpc/perf/power7-pmu.c37
-rw-r--r--arch/powerpc/perf/power8-pmu.c23
-rw-r--r--arch/powerpc/perf/power9-pmu.c55
-rw-r--r--arch/powerpc/perf/ppc970-pmu.c40
-rw-r--r--arch/powerpc/perf/req-gen/perf.h20
32 files changed, 3579 insertions, 982 deletions
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 19124b0b171a..308a2e40d7be 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -15,6 +15,7 @@
#include <asm/firmware.h>
#include <asm/ptrace.h>
#include <asm/code-patching.h>
+#include <asm/inst.h>
#define PERF_8xx_ID_CPU_CYCLES 1
#define PERF_8xx_ID_HW_INSTRUCTIONS 2
@@ -99,9 +100,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
unsigned long target = patch_site_addr(&patch__itlbmiss_perf);
patch_branch_site(&patch__itlbmiss_exit_1, target, 0);
-#ifndef CONFIG_PIN_TLB_TEXT
- patch_branch_site(&patch__itlbmiss_exit_2, target, 0);
-#endif
}
val = itlb_miss_counter;
break;
@@ -110,8 +108,6 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
unsigned long target = patch_site_addr(&patch__dtlbmiss_perf);
patch_branch_site(&patch__dtlbmiss_exit_1, target, 0);
- patch_branch_site(&patch__dtlbmiss_exit_2, target, 0);
- patch_branch_site(&patch__dtlbmiss_exit_3, target, 0);
}
val = dtlb_miss_counter;
break;
@@ -157,13 +153,11 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{
- /* mfspr r10, SPRN_SPRG_SCRATCH0 */
- unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
- __PPC_SPR(SPRN_SPRG_SCRATCH0);
+ ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
mpc8xx_pmu_read(event);
- /* If it was the last user, stop counting to avoid useles overhead */
+ /* If it was the last user, stop counting to avoid useless overhead */
switch (event_type(event)) {
case PERF_8xx_ID_CPU_CYCLES:
break;
@@ -172,19 +166,12 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
mtspr(SPRN_ICTRL, 7);
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
- if (atomic_dec_return(&itlb_miss_ref) == 0) {
+ if (atomic_dec_return(&itlb_miss_ref) == 0)
patch_instruction_site(&patch__itlbmiss_exit_1, insn);
-#ifndef CONFIG_PIN_TLB_TEXT
- patch_instruction_site(&patch__itlbmiss_exit_2, insn);
-#endif
- }
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
- if (atomic_dec_return(&dtlb_miss_ref) == 0) {
+ if (atomic_dec_return(&dtlb_miss_ref) == 0)
patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
- patch_instruction_site(&patch__dtlbmiss_exit_2, insn);
- patch_instruction_site(&patch__dtlbmiss_exit_3, insn);
- }
break;
}
}
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index c155dcbb8691..4f53d0b97539 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -1,12 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
+obj-y += callchain.o callchain_$(BITS).o perf_regs.o
+obj-$(CONFIG_COMPAT) += callchain_32.o
-obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
+obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
isa207-common.o power8-pmu.o power9-pmu.o \
- generic-compat-pmu.o
+ generic-compat-pmu.o power10-pmu.o bhrb.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S
index 1aa3259716b8..47ba05d5ae76 100644
--- a/arch/powerpc/perf/bhrb.S
+++ b/arch/powerpc/perf/bhrb.S
@@ -21,7 +21,7 @@
_GLOBAL(read_bhrb)
cmpldi r3,31
bgt 1f
- ld r4,bhrb_table@got(r2)
+ LOAD_REG_ADDR(r4, bhrb_table)
sldi r3,r3,3
add r3,r4,r3
mtctr r3
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 35d542515faf..6b4434dd0ff3 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -11,15 +11,12 @@
#include <linux/uaccess.h>
#include <linux/mm.h>
#include <asm/ptrace.h>
-#include <asm/pgtable.h>
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
-#ifdef CONFIG_PPC64
-#include "../kernel/ppc32.h"
-#endif
#include <asm/pte-walk.h>
+#include "callchain.h"
/*
* Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -30,7 +27,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
{
if (sp & 0xf)
return 0; /* must be 16-byte aligned */
- if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, current))
return 0;
if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
return 1;
@@ -43,7 +40,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
return 0;
}
-void
+void __no_sanitize_address
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
unsigned long sp, next_sp;
@@ -56,7 +53,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
sp = regs->gpr[1];
perf_callchain_store(entry, perf_instruction_pointer(regs));
- if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, current))
return;
for (;;) {
@@ -64,12 +61,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
next_sp = fp[0];
if (next_sp == sp + STACK_INT_FRAME_SIZE &&
- fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ validate_sp_size(sp, current, STACK_INT_FRAME_SIZE) &&
+ fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
/*
* This looks like an interrupt frame for an
* interrupt that occurred in the kernel
*/
- regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
+ regs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
next_ip = regs->nip;
lr = regs->link;
level = 0;
@@ -102,372 +100,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
}
}
-#ifdef CONFIG_PPC64
-/*
- * On 64-bit we don't want to invoke hash_page on user addresses from
- * interrupt context, so if the access faults, we read the page tables
- * to find which page (if any) is mapped and access it directly.
- */
-static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
-{
- int ret = -EFAULT;
- pgd_t *pgdir;
- pte_t *ptep, pte;
- unsigned shift;
- unsigned long addr = (unsigned long) ptr;
- unsigned long offset;
- unsigned long pfn, flags;
- void *kaddr;
-
- pgdir = current->mm->pgd;
- if (!pgdir)
- return -EFAULT;
-
- local_irq_save(flags);
- ptep = find_current_mm_pte(pgdir, addr, NULL, &shift);
- if (!ptep)
- goto err_out;
- if (!shift)
- shift = PAGE_SHIFT;
-
- /* align address to page boundary */
- offset = addr & ((1UL << shift) - 1);
-
- pte = READ_ONCE(*ptep);
- if (!pte_present(pte) || !pte_user(pte))
- goto err_out;
- pfn = pte_pfn(pte);
- if (!page_is_ram(pfn))
- goto err_out;
-
- /* no highmem to worry about here */
- kaddr = pfn_to_kaddr(pfn);
- memcpy(buf, kaddr + offset, nb);
- ret = 0;
-err_out:
- local_irq_restore(flags);
- return ret;
-}
-
-static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
-{
- if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
- ((unsigned long)ptr & 7))
- return -EFAULT;
-
- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
- return 0;
- }
- pagefault_enable();
-
- return read_user_stack_slow(ptr, ret, 8);
-}
-
-static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
-{
- if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
- ((unsigned long)ptr & 3))
- return -EFAULT;
-
- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
- return 0;
- }
- pagefault_enable();
-
- return read_user_stack_slow(ptr, ret, 4);
-}
-
-static inline int valid_user_sp(unsigned long sp, int is_64)
-{
- if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
- return 0;
- return 1;
-}
-
-/*
- * 64-bit user processes use the same stack frame for RT and non-RT signals.
- */
-struct signal_frame_64 {
- char dummy[__SIGNAL_FRAMESIZE];
- struct ucontext uc;
- unsigned long unused[2];
- unsigned int tramp[6];
- struct siginfo *pinfo;
- void *puc;
- struct siginfo info;
- char abigap[288];
-};
-
-static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
-{
- if (nip == fp + offsetof(struct signal_frame_64, tramp))
- return 1;
- if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
- return 1;
- return 0;
-}
-
-/*
- * Do some sanity checking on the signal frame pointed to by sp.
- * We check the pinfo and puc pointers in the frame.
- */
-static int sane_signal_64_frame(unsigned long sp)
-{
- struct signal_frame_64 __user *sf;
- unsigned long pinfo, puc;
-
- sf = (struct signal_frame_64 __user *) sp;
- if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
- read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
- return 0;
- return pinfo == (unsigned long) &sf->info &&
- puc == (unsigned long) &sf->uc;
-}
-
-static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{
- unsigned long sp, next_sp;
- unsigned long next_ip;
- unsigned long lr;
- long level = 0;
- struct signal_frame_64 __user *sigframe;
- unsigned long __user *fp, *uregs;
-
- next_ip = perf_instruction_pointer(regs);
- lr = regs->link;
- sp = regs->gpr[1];
- perf_callchain_store(entry, next_ip);
-
- while (entry->nr < entry->max_stack) {
- fp = (unsigned long __user *) sp;
- if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
- return;
- if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
- return;
-
- /*
- * Note: the next_sp - sp >= signal frame size check
- * is true when next_sp < sp, which can happen when
- * transitioning from an alternate signal stack to the
- * normal stack.
- */
- if (next_sp - sp >= sizeof(struct signal_frame_64) &&
- (is_sigreturn_64_address(next_ip, sp) ||
- (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
- sane_signal_64_frame(sp)) {
- /*
- * This looks like an signal frame
- */
- sigframe = (struct signal_frame_64 __user *) sp;
- uregs = sigframe->uc.uc_mcontext.gp_regs;
- if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
- read_user_stack_64(&uregs[PT_LNK], &lr) ||
- read_user_stack_64(&uregs[PT_R1], &sp))
- return;
- level = 0;
- perf_callchain_store_context(entry, PERF_CONTEXT_USER);
- perf_callchain_store(entry, next_ip);
- continue;
- }
-
- if (level == 0)
- next_ip = lr;
- perf_callchain_store(entry, next_ip);
- ++level;
- sp = next_sp;
- }
-}
-
-#else /* CONFIG_PPC64 */
-/*
- * On 32-bit we just access the address and let hash_page create a
- * HPTE if necessary, so there is no need to fall back to reading
- * the page tables. Since this is called at interrupt level,
- * do_page_fault() won't treat a DSI as a page fault.
- */
-static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
-{
- int rc;
-
- if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
- ((unsigned long)ptr & 3))
- return -EFAULT;
-
- pagefault_disable();
- rc = __get_user_inatomic(*ret, ptr);
- pagefault_enable();
-
- return rc;
-}
-
-static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{
-}
-
-static inline int valid_user_sp(unsigned long sp, int is_64)
-{
- if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
- return 0;
- return 1;
-}
-
-#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
-#define sigcontext32 sigcontext
-#define mcontext32 mcontext
-#define ucontext32 ucontext
-#define compat_siginfo_t struct siginfo
-
-#endif /* CONFIG_PPC64 */
-
-/*
- * Layout for non-RT signal frames
- */
-struct signal_frame_32 {
- char dummy[__SIGNAL_FRAMESIZE32];
- struct sigcontext32 sctx;
- struct mcontext32 mctx;
- int abigap[56];
-};
-
-/*
- * Layout for RT signal frames
- */
-struct rt_signal_frame_32 {
- char dummy[__SIGNAL_FRAMESIZE32 + 16];
- compat_siginfo_t info;
- struct ucontext32 uc;
- int abigap[56];
-};
-
-static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
-{
- if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
- return 1;
- if (vdso32_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso32_sigtramp)
- return 1;
- return 0;
-}
-
-static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
-{
- if (nip == fp + offsetof(struct rt_signal_frame_32,
- uc.uc_mcontext.mc_pad))
- return 1;
- if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
- return 1;
- return 0;
-}
-
-static int sane_signal_32_frame(unsigned int sp)
-{
- struct signal_frame_32 __user *sf;
- unsigned int regs;
-
- sf = (struct signal_frame_32 __user *) (unsigned long) sp;
- if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
- return 0;
- return regs == (unsigned long) &sf->mctx;
-}
-
-static int sane_rt_signal_32_frame(unsigned int sp)
-{
- struct rt_signal_frame_32 __user *sf;
- unsigned int regs;
-
- sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
- if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
- return 0;
- return regs == (unsigned long) &sf->uc.uc_mcontext;
-}
-
-static unsigned int __user *signal_frame_32_regs(unsigned int sp,
- unsigned int next_sp, unsigned int next_ip)
-{
- struct mcontext32 __user *mctx = NULL;
- struct signal_frame_32 __user *sf;
- struct rt_signal_frame_32 __user *rt_sf;
-
- /*
- * Note: the next_sp - sp >= signal frame size check
- * is true when next_sp < sp, for example, when
- * transitioning from an alternate signal stack to the
- * normal stack.
- */
- if (next_sp - sp >= sizeof(struct signal_frame_32) &&
- is_sigreturn_32_address(next_ip, sp) &&
- sane_signal_32_frame(sp)) {
- sf = (struct signal_frame_32 __user *) (unsigned long) sp;
- mctx = &sf->mctx;
- }
-
- if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
- is_rt_sigreturn_32_address(next_ip, sp) &&
- sane_rt_signal_32_frame(sp)) {
- rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
- mctx = &rt_sf->uc.uc_mcontext;
- }
-
- if (!mctx)
- return NULL;
- return mctx->mc_gregs;
-}
-
-static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
- struct pt_regs *regs)
-{
- unsigned int sp, next_sp;
- unsigned int next_ip;
- unsigned int lr;
- long level = 0;
- unsigned int __user *fp, *uregs;
-
- next_ip = perf_instruction_pointer(regs);
- lr = regs->link;
- sp = regs->gpr[1];
- perf_callchain_store(entry, next_ip);
-
- while (entry->nr < entry->max_stack) {
- fp = (unsigned int __user *) (unsigned long) sp;
- if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
- return;
- if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
- return;
-
- uregs = signal_frame_32_regs(sp, next_sp, next_ip);
- if (!uregs && level <= 1)
- uregs = signal_frame_32_regs(sp, next_sp, lr);
- if (uregs) {
- /*
- * This looks like an signal frame, so restart
- * the stack trace with the values in it.
- */
- if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
- read_user_stack_32(&uregs[PT_LNK], &lr) ||
- read_user_stack_32(&uregs[PT_R1], &sp))
- return;
- level = 0;
- perf_callchain_store_context(entry, PERF_CONTEXT_USER);
- perf_callchain_store(entry, next_ip);
- continue;
- }
-
- if (level == 0)
- next_ip = lr;
- perf_callchain_store(entry, next_ip);
- ++level;
- sp = next_sp;
- }
-}
-
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
new file mode 100644
index 000000000000..19a8d051ddf1
--- /dev/null
+++ b/arch/powerpc/perf/callchain.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_PERF_CALLCHAIN_H
+#define _POWERPC_PERF_CALLCHAIN_H
+
+void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs);
+void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs);
+
+static inline bool invalid_user_sp(unsigned long sp)
+{
+ unsigned long mask = is_32bit_task() ? 3 : 7;
+ unsigned long top = STACK_TOP - (is_32bit_task() ? 16 : 32);
+
+ return (!sp || (sp & mask) || (sp > top));
+}
+
+/*
+ * On 32-bit we just access the address and let hash_page create a
+ * HPTE if necessary, so there is no need to fall back to reading
+ * the page tables. Since this is called at interrupt level,
+ * do_page_fault() won't treat a DSI as a page fault.
+ */
+static inline int __read_user_stack(const void __user *ptr, void *ret,
+ size_t size)
+{
+ unsigned long addr = (unsigned long)ptr;
+
+ if (addr > TASK_SIZE - size || (addr & (size - 1)))
+ return -EFAULT;
+
+ return copy_from_user_nofault(ret, ptr, size);
+}
+
+#endif /* _POWERPC_PERF_CALLCHAIN_H */
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c
new file mode 100644
index 000000000000..ea8cfe3806dc
--- /dev/null
+++ b/arch/powerpc/perf/callchain_32.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+#ifdef CONFIG_PPC64
+#include <asm/syscalls_32.h>
+#else /* CONFIG_PPC64 */
+
+#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
+#define sigcontext32 sigcontext
+#define mcontext32 mcontext
+#define ucontext32 ucontext
+#define compat_siginfo_t struct siginfo
+
+#endif /* CONFIG_PPC64 */
+
+static int read_user_stack_32(const unsigned int __user *ptr, unsigned int *ret)
+{
+ return __read_user_stack(ptr, ret, sizeof(*ret));
+}
+
+/*
+ * Layout for non-RT signal frames
+ */
+struct signal_frame_32 {
+ char dummy[__SIGNAL_FRAMESIZE32];
+ struct sigcontext32 sctx;
+ struct mcontext32 mctx;
+ int abigap[56];
+};
+
+/*
+ * Layout for RT signal frames
+ */
+struct rt_signal_frame_32 {
+ char dummy[__SIGNAL_FRAMESIZE32 + 16];
+ compat_siginfo_t info;
+ struct ucontext32 uc;
+ int abigap[56];
+};
+
+static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+ if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
+ return 1;
+ if (current->mm->context.vdso &&
+ nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp32))
+ return 1;
+ return 0;
+}
+
+static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+ if (nip == fp + offsetof(struct rt_signal_frame_32,
+ uc.uc_mcontext.mc_pad))
+ return 1;
+ if (current->mm->context.vdso &&
+ nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp_rt32))
+ return 1;
+ return 0;
+}
+
+static int sane_signal_32_frame(unsigned int sp)
+{
+ struct signal_frame_32 __user *sf;
+ unsigned int regs;
+
+ sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+ if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
+ return 0;
+ return regs == (unsigned long) &sf->mctx;
+}
+
+static int sane_rt_signal_32_frame(unsigned int sp)
+{
+ struct rt_signal_frame_32 __user *sf;
+ unsigned int regs;
+
+ sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+ if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
+ return 0;
+ return regs == (unsigned long) &sf->uc.uc_mcontext;
+}
+
+static unsigned int __user *signal_frame_32_regs(unsigned int sp,
+ unsigned int next_sp, unsigned int next_ip)
+{
+ struct mcontext32 __user *mctx = NULL;
+ struct signal_frame_32 __user *sf;
+ struct rt_signal_frame_32 __user *rt_sf;
+
+ /*
+ * Note: the next_sp - sp >= signal frame size check
+ * is true when next_sp < sp, for example, when
+ * transitioning from an alternate signal stack to the
+ * normal stack.
+ */
+ if (next_sp - sp >= sizeof(struct signal_frame_32) &&
+ is_sigreturn_32_address(next_ip, sp) &&
+ sane_signal_32_frame(sp)) {
+ sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+ mctx = &sf->mctx;
+ }
+
+ if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
+ is_rt_sigreturn_32_address(next_ip, sp) &&
+ sane_rt_signal_32_frame(sp)) {
+ rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+ mctx = &rt_sf->uc.uc_mcontext;
+ }
+
+ if (!mctx)
+ return NULL;
+ return mctx->mc_gregs;
+}
+
+void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned int sp, next_sp;
+ unsigned int next_ip;
+ unsigned int lr;
+ long level = 0;
+ unsigned int __user *fp, *uregs;
+
+ next_ip = perf_instruction_pointer(regs);
+ lr = regs->link;
+ sp = regs->gpr[1];
+ perf_callchain_store(entry, next_ip);
+
+ while (entry->nr < entry->max_stack) {
+ fp = (unsigned int __user *) (unsigned long) sp;
+ if (invalid_user_sp(sp) || read_user_stack_32(fp, &next_sp))
+ return;
+ if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
+ return;
+
+ uregs = signal_frame_32_regs(sp, next_sp, next_ip);
+ if (!uregs && level <= 1)
+ uregs = signal_frame_32_regs(sp, next_sp, lr);
+ if (uregs) {
+ /*
+ * This looks like an signal frame, so restart
+ * the stack trace with the values in it.
+ */
+ if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
+ read_user_stack_32(&uregs[PT_LNK], &lr) ||
+ read_user_stack_32(&uregs[PT_R1], &sp))
+ return;
+ level = 0;
+ perf_callchain_store_context(entry, PERF_CONTEXT_USER);
+ perf_callchain_store(entry, next_ip);
+ continue;
+ }
+
+ if (level == 0)
+ next_ip = lr;
+ perf_callchain_store(entry, next_ip);
+ ++level;
+ sp = next_sp;
+ }
+}
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
new file mode 100644
index 000000000000..488e8a21a11e
--- /dev/null
+++ b/arch/powerpc/perf/callchain_64.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret)
+{
+ return __read_user_stack(ptr, ret, sizeof(*ret));
+}
+
+/*
+ * 64-bit user processes use the same stack frame for RT and non-RT signals.
+ */
+struct signal_frame_64 {
+ char dummy[__SIGNAL_FRAMESIZE];
+ struct ucontext uc;
+ unsigned long unused[2];
+ unsigned int tramp[6];
+ struct siginfo *pinfo;
+ void *puc;
+ struct siginfo info;
+ char abigap[288];
+};
+
+static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
+{
+ if (nip == fp + offsetof(struct signal_frame_64, tramp))
+ return 1;
+ if (current->mm->context.vdso &&
+ nip == VDSO64_SYMBOL(current->mm->context.vdso, sigtramp_rt64))
+ return 1;
+ return 0;
+}
+
+/*
+ * Do some sanity checking on the signal frame pointed to by sp.
+ * We check the pinfo and puc pointers in the frame.
+ */
+static int sane_signal_64_frame(unsigned long sp)
+{
+ struct signal_frame_64 __user *sf;
+ unsigned long pinfo, puc;
+
+ sf = (struct signal_frame_64 __user *) sp;
+ if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
+ read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
+ return 0;
+ return pinfo == (unsigned long) &sf->info &&
+ puc == (unsigned long) &sf->uc;
+}
+
+void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned long sp, next_sp;
+ unsigned long next_ip;
+ unsigned long lr;
+ long level = 0;
+ struct signal_frame_64 __user *sigframe;
+ unsigned long __user *fp, *uregs;
+
+ next_ip = perf_instruction_pointer(regs);
+ lr = regs->link;
+ sp = regs->gpr[1];
+ perf_callchain_store(entry, next_ip);
+
+ while (entry->nr < entry->max_stack) {
+ fp = (unsigned long __user *) sp;
+ if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
+ return;
+ if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
+ return;
+
+ /*
+ * Note: the next_sp - sp >= signal frame size check
+ * is true when next_sp < sp, which can happen when
+ * transitioning from an alternate signal stack to the
+ * normal stack.
+ */
+ if (next_sp - sp >= sizeof(struct signal_frame_64) &&
+ (is_sigreturn_64_address(next_ip, sp) ||
+ (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
+ sane_signal_64_frame(sp)) {
+ /*
+ * This looks like an signal frame
+ */
+ sigframe = (struct signal_frame_64 __user *) sp;
+ uregs = sigframe->uc.uc_mcontext.gp_regs;
+ if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
+ read_user_stack_64(&uregs[PT_LNK], &lr) ||
+ read_user_stack_64(&uregs[PT_R1], &sp))
+ return;
+ level = 0;
+ perf_callchain_store_context(entry, PERF_CONTEXT_USER);
+ perf_callchain_store(entry, next_ip);
+ continue;
+ }
+
+ if (level == 0)
+ next_ip = lr;
+ perf_callchain_store(entry, next_ip);
+ ++level;
+ sp = next_sp;
+ }
+}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 48604625ab31..6b5f8a94e7d8 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -17,6 +17,8 @@
#include <asm/firmware.h>
#include <asm/ptrace.h>
#include <asm/code-patching.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
#ifdef CONFIG_PPC64
#include "internal.h"
@@ -37,12 +39,7 @@ struct cpu_hw_events {
struct perf_event *event[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int flags[MAX_HWEVENTS];
- /*
- * The order of the MMCR array is:
- * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2
- * - 32-bit, MMCR0, MMCR1, MMCR2
- */
- unsigned long mmcr[4];
+ struct mmcr_regs mmcr;
struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
@@ -59,6 +56,9 @@ struct cpu_hw_events {
struct perf_branch_stack bhrb_stack;
struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
u64 ic_init;
+
+ /* Store the PMC values */
+ unsigned long pmcs[MAX_HWEVENTS];
};
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -77,6 +77,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
/*
* 32-bit doesn't have MMCRA but does have an MMCR2,
* and a few other names are different.
+ * Also 32-bit doesn't have MMCR3, SIER2 and SIER3.
+ * Define them as zero knowing that any code path accessing
+ * these registers (via mtspr/mfspr) are done under ppmu flag
+ * check for PPMU_ARCH_31 and we will not enter that code path
+ * for 32-bit.
*/
#ifdef CONFIG_PPC32
@@ -90,7 +95,12 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
#define MMCR0_PMCC_U6 0
#define SPRN_MMCRA SPRN_MMCR2
+#define SPRN_MMCR3 0
+#define SPRN_SIER2 0
+#define SPRN_SIER3 0
#define MMCRA_SAMPLE_ENABLE 0
+#define MMCRA_BHRB_DISABLE 0
+#define MMCR0_PMCCEXT 0
static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
@@ -105,10 +115,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
{
regs->result = 0;
}
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
- return 0;
-}
static inline int siar_valid(struct pt_regs *regs)
{
@@ -121,24 +127,38 @@ static void ebb_event_add(struct perf_event *event) { }
static void ebb_switch_out(unsigned long mmcr0) { }
static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
{
- return cpuhw->mmcr[0];
+ return cpuhw->mmcr.mmcr0;
}
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
-static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
+static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
bool is_sier_available(void)
{
+ if (!ppmu)
+ return false;
+
if (ppmu->flags & PPMU_HAS_SIER)
return true;
return false;
}
+/*
+ * Return PMC value corresponding to the
+ * index passed.
+ */
+unsigned long get_pmcs_ext_regs(int idx)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ return cpuhw->pmcs[idx];
+}
+
static bool regs_use_siar(struct pt_regs *regs)
{
/*
@@ -150,7 +170,7 @@ static bool regs_use_siar(struct pt_regs *regs)
* they have not been setup using perf_read_regs() and so regs->result
* is something random.
*/
- return ((TRAP(regs) == 0xf00) && regs->result);
+ return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result);
}
/*
@@ -204,7 +224,7 @@ static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
- if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
+ if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel)
*addrp = 0;
}
@@ -236,7 +256,7 @@ static bool regs_sipr(struct pt_regs *regs)
static inline u32 perf_flags_from_msr(struct pt_regs *regs)
{
- if (regs->msr & MSR_PR)
+ if (user_mode(regs))
return PERF_RECORD_MISC_USER;
if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV)
return PERF_RECORD_MISC_HYPERVISOR;
@@ -246,11 +266,32 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs)
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
bool use_siar = regs_use_siar(regs);
+ unsigned long mmcra = regs->dsisr;
+ int marked = mmcra & MMCRA_SAMPLE_ENABLE;
if (!use_siar)
return perf_flags_from_msr(regs);
/*
+ * Check the address in SIAR to identify the
+ * privilege levels since the SIER[MSR_HV, MSR_PR]
+ * bits are not set for marked events in power10
+ * DD1.
+ */
+ if (marked && (ppmu->flags & PPMU_P10_DD1)) {
+ unsigned long siar = mfspr(SPRN_SIAR);
+ if (siar) {
+ if (is_kernel_addr(siar))
+ return PERF_RECORD_MISC_KERNEL;
+ return PERF_RECORD_MISC_USER;
+ } else {
+ if (is_kernel_addr(regs->nip))
+ return PERF_RECORD_MISC_KERNEL;
+ return PERF_RECORD_MISC_USER;
+ }
+ }
+
+ /*
* If we don't have flags in MMCRA, rather than using
* the MSR, we intuit the flags from the address in
* SIAR which should give slightly more reliable
@@ -300,6 +341,13 @@ static inline void perf_read_regs(struct pt_regs *regs)
* If the PMU doesn't update the SIAR for non marked events use
* pt_regs.
*
+ * If regs is a kernel interrupt, always use SIAR. Some PMUs have an
+ * issue with regs_sipr not being in synch with SIAR in interrupt entry
+ * and return sequences, which can result in regs_sipr being true for
+ * kernel interrupts and SIAR, which has the effect of causing samples
+ * to pile up at mtmsrd MSR[EE] 0->1 or pending irq replay around
+ * interrupt entry/exit.
+ *
* If the PMU has HV/PR flags then check to see if they
* place the exception in userspace. If so, use pt_regs. In
* continuous sampling mode the SIAR and the PMU exception are
@@ -308,7 +356,7 @@ static inline void perf_read_regs(struct pt_regs *regs)
* hypervisor samples as well as samples in the kernel with
* interrupts off hence the userspace check.
*/
- if (TRAP(regs) != 0xf00)
+ if (TRAP(regs) != INTERRUPT_PERFMON)
use_siar = 0;
else if ((ppmu->flags & PPMU_NO_SIAR))
use_siar = 0;
@@ -316,6 +364,8 @@ static inline void perf_read_regs(struct pt_regs *regs)
use_siar = 1;
else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
use_siar = 0;
+ else if (!user_mode(regs))
+ use_siar = 1;
else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
use_siar = 0;
else
@@ -325,15 +375,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
}
/*
- * If interrupts were soft-disabled when a PMU interrupt occurs, treat
- * it as an NMI.
- */
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
- return (regs->softe & IRQS_DISABLED);
-}
-
-/*
* On processors like P7+ that have the SIAR-Valid bit, marked instructions
* must be sampled only if the SIAR-valid bit is set.
*
@@ -346,7 +387,14 @@ static inline int siar_valid(struct pt_regs *regs)
int marked = mmcra & MMCRA_SAMPLE_ENABLE;
if (marked) {
- if (ppmu->flags & PPMU_HAS_SIER)
+ /*
+ * SIER[SIAR_VALID] is not set for some
+ * marked events on power10 DD1, so drop
+ * the check for SIER[SIAR_VALID] and return true.
+ */
+ if (ppmu->flags & PPMU_P10_DD1)
+ return 0x1;
+ else if (ppmu->flags & PPMU_HAS_SIER)
return regs->dar & SIER_SIAR_VALID;
if (ppmu->flags & PPMU_SIAR_VALID)
@@ -376,7 +424,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
cpuhw->bhrb_context = event->ctx;
}
cpuhw->bhrb_users++;
- perf_sched_cb_inc(event->ctx->pmu);
+ perf_sched_cb_inc(event->pmu);
}
static void power_pmu_bhrb_disable(struct perf_event *event)
@@ -388,7 +436,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
WARN_ON_ONCE(!cpuhw->bhrb_users);
cpuhw->bhrb_users--;
- perf_sched_cb_dec(event->ctx->pmu);
+ perf_sched_cb_dec(event->pmu);
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
/* BHRB cannot be turned off when other
@@ -403,7 +451,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
/* Called from ctxsw to prevent one process's branch entries to
* mingle with the other process's entries during context switch.
*/
-static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
if (!ppmu->bhrb_nr)
return;
@@ -415,24 +463,20 @@ static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static __u64 power_pmu_bhrb_to(u64 addr)
{
unsigned int instr;
- int ret;
__u64 target;
if (is_kernel_addr(addr)) {
- if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+ if (copy_from_kernel_nofault(&instr, (void *)addr,
+ sizeof(instr)))
return 0;
return branch_target(&instr);
}
/* Userspace: need copy instruction here then translate it */
- pagefault_disable();
- ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
- if (ret) {
- pagefault_enable();
+ if (copy_from_user_nofault(&instr, (unsigned int __user *)addr,
+ sizeof(instr)))
return 0;
- }
- pagefault_enable();
target = branch_target(&instr);
if ((!target) || (instr & BRANCH_ABSOLUTE))
@@ -470,8 +514,11 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
* addresses at this point. Check the privileges before
* exporting it to userspace (avoid exposure of regions
* where we could have speculative execution)
+ * Incase of ISA v3.1, BHRB will capture only user-space
+ * addresses, hence include a check before filtering code
*/
- if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
+ if (!(ppmu->flags & PPMU_ARCH_31) &&
+ is_kernel_addr(addr) && event->attr.exclude_kernel)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -524,6 +571,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
}
}
cpuhw->bhrb_stack.nr = u_index;
+ cpuhw->bhrb_stack.hw_idx = -1ULL;
return;
}
@@ -589,11 +637,16 @@ static void ebb_switch_out(unsigned long mmcr0)
current->thread.sdar = mfspr(SPRN_SDAR);
current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK;
current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
+ if (ppmu->flags & PPMU_ARCH_31) {
+ current->thread.mmcr3 = mfspr(SPRN_MMCR3);
+ current->thread.sier2 = mfspr(SPRN_SIER2);
+ current->thread.sier3 = mfspr(SPRN_SIER3);
+ }
}
static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
{
- unsigned long mmcr0 = cpuhw->mmcr[0];
+ unsigned long mmcr0 = cpuhw->mmcr.mmcr0;
if (!ebb)
goto out;
@@ -627,7 +680,13 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
* unfreeze counters, it should not set exclude_xxx in its events and
* instead manage the MMCR2 entirely by itself.
*/
- mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2);
+ mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2);
+
+ if (ppmu->flags & PPMU_ARCH_31) {
+ mtspr(SPRN_MMCR3, current->thread.mmcr3);
+ mtspr(SPRN_SIER2, current->thread.sier2);
+ mtspr(SPRN_SIER3, current->thread.sier3);
+ }
out:
return mmcr0;
}
@@ -717,6 +776,34 @@ static void pmao_restore_workaround(bool ebb)
mtspr(SPRN_PMC6, pmcs[5]);
}
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * through to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+ struct cpu_hw_events *cpuhw;
+
+ /*
+ * This could simply test local_paca->pmcregs_in_use if that were not
+ * under ifdef KVM.
+ */
+ if (!ppmu)
+ return false;
+
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ return cpuhw->n_events;
+}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@@ -799,6 +886,19 @@ static void write_pmc(int idx, unsigned long val)
}
}
+static int any_pmc_overflown(struct cpu_hw_events *cpuhw)
+{
+ int i, idx;
+
+ for (i = 0; i < cpuhw->n_events; i++) {
+ idx = cpuhw->event[i]->hw.idx;
+ if ((idx) && ((int)read_pmc(idx) < 0))
+ return idx;
+ }
+
+ return 0;
+}
+
/* Called from sysrq_handle_showregs() */
void perf_event_print_debug(void)
{
@@ -848,6 +948,11 @@ void perf_event_print_debug(void)
pr_info("EBBRR: %016lx BESCR: %016lx\n",
mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
}
+
+ if (ppmu->flags & PPMU_ARCH_31) {
+ pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n",
+ mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3));
+ }
#endif
pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n",
mfspr(SPRN_SIAR), sdar, sier);
@@ -863,7 +968,7 @@ void perf_event_print_debug(void)
*/
static int power_check_constraints(struct cpu_hw_events *cpuhw,
u64 event_id[], unsigned int cflags[],
- int n_ev)
+ int n_ev, struct perf_event **event)
{
unsigned long mask, value, nv;
unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
@@ -886,7 +991,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
event_id[i] = cpuhw->alternatives[i][0];
}
if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
- &cpuhw->avalues[i][0]))
+ &cpuhw->avalues[i][0], event[i]->attr.config1))
return -1;
}
value = mask = 0;
@@ -921,7 +1026,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
for (j = 1; j < n_alt[i]; ++j)
ppmu->get_constraint(cpuhw->alternatives[i][j],
&cpuhw->amasks[i][j],
- &cpuhw->avalues[i][j]);
+ &cpuhw->avalues[i][j],
+ event[i]->attr.config1);
}
/* enumerate all possibilities and see if any will work */
@@ -1036,7 +1142,7 @@ static u64 check_and_compute_delta(u64 prev, u64 val)
/*
* POWER7 can roll back counter values, if the new value is smaller
* than the previous value it will cause the delta and the counter to
- * have bogus values unless we rolled a counter over. If a coutner is
+ * have bogus values unless we rolled a counter over. If a counter is
* rolled back, it will be smaller, but within 256, which is the maximum
* number of events to rollback at once. If we detect a rollback
* return 0. This can lead to a small lack of precision in the
@@ -1199,7 +1305,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
static void power_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
- unsigned long flags, mmcr0, val;
+ unsigned long flags, mmcr0, val, mmcra;
if (!ppmu)
return;
@@ -1217,11 +1323,16 @@ static void power_pmu_disable(struct pmu *pmu)
/*
* Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56
+ * Also clear PMXE to disable PMI's getting triggered in some
+ * corner cases during PMU disable.
*/
val = mmcr0 = mfspr(SPRN_MMCR0);
val |= MMCR0_FC;
val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
- MMCR0_FC56);
+ MMCR0_PMXE | MMCR0_FC56);
+ /* Set mmcr0 PMCCEXT for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCR0_PMCCEXT;
/*
* The barrier is to make sure the mtspr has been
@@ -1233,11 +1344,45 @@ static void power_pmu_disable(struct pmu *pmu)
isync();
/*
+ * Some corner cases could clear the PMU counter overflow
+ * while a masked PMI is pending. One such case is when
+ * a PMI happens during interrupt replay and perf counter
+ * values are cleared by PMU callbacks before replay.
+ *
+ * Disable the interrupt by clearing the paca bit for PMI
+ * since we are disabling the PMU now. Otherwise provide a
+ * warning if there is PMI pending, but no counter is found
+ * overflown.
+ *
+ * Since power_pmu_disable runs under local_irq_save, it
+ * could happen that code hits a PMC overflow without PMI
+ * pending in paca. Hence only clear PMI pending if it was
+ * set.
+ *
+ * If a PMI is pending, then MSR[EE] must be disabled (because
+ * the masked PMI handler disabling EE). So it is safe to
+ * call clear_pmi_irq_pending().
+ */
+ if (pmi_irq_pending())
+ clear_pmi_irq_pending();
+
+ val = mmcra = cpuhw->mmcr.mmcra;
+
+ /*
* Disable instruction sampling if it was enabled
*/
- if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
- mtspr(SPRN_MMCRA,
- cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+ val &= ~MMCRA_SAMPLE_ENABLE;
+
+ /* Disable BHRB via mmcra (BHRBRD) for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCRA_BHRB_DISABLE;
+
+ /*
+ * Write SPRN_MMCRA if mmcra has either disabled
+ * instruction sampling or BHRB.
+ */
+ if (val != mmcra) {
+ mtspr(SPRN_MMCRA, val);
mb();
isync();
}
@@ -1311,18 +1456,29 @@ static void power_pmu_enable(struct pmu *pmu)
* (possibly updated for removal of events).
*/
if (!cpuhw->n_added) {
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
- mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+ /*
+ * If there is any active event with an overflown PMC
+ * value, set back PACA_IRQ_PMI which would have been
+ * cleared in power_pmu_disable().
+ */
+ hard_irq_disable();
+ if (any_pmc_overflown(cpuhw))
+ set_pmi_irq_pending();
+
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ if (ppmu->flags & PPMU_ARCH_31)
+ mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
goto out_enable;
}
/*
* Clear all MMCR settings and recompute them for the new set of events.
*/
- memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
+ memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
- cpuhw->mmcr, cpuhw->event)) {
+ &cpuhw->mmcr, cpuhw->event, ppmu->flags)) {
/* shouldn't ever get here */
printk(KERN_ERR "oops compute_mmcr failed\n");
goto out;
@@ -1336,11 +1492,11 @@ static void power_pmu_enable(struct pmu *pmu)
*/
event = cpuhw->event[0];
if (event->attr.exclude_user)
- cpuhw->mmcr[0] |= MMCR0_FCP;
+ cpuhw->mmcr.mmcr0 |= MMCR0_FCP;
if (event->attr.exclude_kernel)
- cpuhw->mmcr[0] |= freeze_events_kernel;
+ cpuhw->mmcr.mmcr0 |= freeze_events_kernel;
if (event->attr.exclude_hv)
- cpuhw->mmcr[0] |= MMCR0_FCHV;
+ cpuhw->mmcr.mmcr0 |= MMCR0_FCHV;
}
/*
@@ -1349,12 +1505,15 @@ static void power_pmu_enable(struct pmu *pmu)
* Then unfreeze the events.
*/
ppc_set_pmu_inuse(1);
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
- mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
- mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
| MMCR0_FC);
if (ppmu->flags & PPMU_ARCH_207S)
- mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
+ mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2);
+
+ if (ppmu->flags & PPMU_ARCH_31)
+ mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
/*
* Read off any pre-existing events that need to move
@@ -1405,7 +1564,7 @@ static void power_pmu_enable(struct pmu *pmu)
perf_event_update_userpage(event);
}
cpuhw->n_limited = n_lim;
- cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+ cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE;
out_enable:
pmao_restore_workaround(ebb);
@@ -1421,9 +1580,9 @@ static void power_pmu_enable(struct pmu *pmu)
/*
* Enable instruction sampling if necessary
*/
- if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+ if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) {
mb();
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra);
}
out:
@@ -1507,7 +1666,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
goto out;
- if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+ if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event))
goto out;
event->hw.config = cpuhw->events[n0];
@@ -1520,9 +1679,16 @@ nocheck:
ret = 0;
out:
if (has_branch_stack(event)) {
- power_pmu_bhrb_enable(event);
- cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
- event->attr.branch_sample_type);
+ u64 bhrb_filter = -1;
+
+ if (ppmu->bhrb_filter_map)
+ bhrb_filter = ppmu->bhrb_filter_map(
+ event->attr.branch_sample_type);
+
+ if (bhrb_filter != -1) {
+ cpuhw->bhrb_filter = bhrb_filter;
+ power_pmu_bhrb_enable(event);
+ }
}
perf_pmu_enable(event->pmu);
@@ -1553,7 +1719,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
cpuhw->flags[i-1] = cpuhw->flags[i];
}
--cpuhw->n_events;
- ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
+ ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr);
if (event->hw.idx) {
write_pmc(event->hw.idx, 0);
event->hw.idx = 0;
@@ -1574,7 +1740,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
}
if (cpuhw->n_events == 0) {
/* disable exceptions if no events are running */
- cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+ cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE);
}
if (has_branch_stack(event))
@@ -1710,7 +1876,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
n = cpuhw->n_events;
if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
return -EAGAIN;
- i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
+ i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event);
if (i < 0)
return -EAGAIN;
@@ -1798,7 +1964,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
static int hw_perf_cache_event(u64 config, u64 *eventp)
{
unsigned long type, op, result;
- int ev;
+ u64 ev;
if (!ppmu->cache_events)
return -EINVAL;
@@ -1837,14 +2003,13 @@ static bool is_event_blacklisted(u64 ev)
static int power_pmu_event_init(struct perf_event *event)
{
u64 ev;
- unsigned long flags;
+ unsigned long flags, irq_flags;
struct perf_event *ctrs[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int cflags[MAX_HWEVENTS];
int n;
int err;
struct cpu_hw_events *cpuhw;
- u64 bhrb_filter;
if (!ppmu)
return -ENOENT;
@@ -1883,6 +2048,17 @@ static int power_pmu_event_init(struct perf_event *event)
return -ENOENT;
}
+ /*
+ * PMU config registers have fields that are
+ * reserved and some specific values for bit fields are reserved.
+ * For ex., MMCRA[61:62] is Random Sampling Mode (SM)
+ * and value of 0b11 to this field is reserved.
+ * Check for invalid values in attr.config.
+ */
+ if (ppmu->check_attr_config &&
+ ppmu->check_attr_config(event))
+ return -EINVAL;
+
event->hw.config_base = ev;
event->hw.idx = 0;
@@ -1946,21 +2122,43 @@ static int power_pmu_event_init(struct perf_event *event)
if (check_excludes(ctrs, cflags, n, 1))
return -EINVAL;
- cpuhw = &get_cpu_var(cpu_hw_events);
- err = power_check_constraints(cpuhw, events, cflags, n + 1);
+ local_irq_save(irq_flags);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs);
if (has_branch_stack(event)) {
- bhrb_filter = ppmu->bhrb_filter_map(
+ u64 bhrb_filter = -1;
+
+ /*
+ * Currently no PMU supports having multiple branch filters
+ * at the same time. Branch filters are set via MMCRA IFM[32:33]
+ * bits for Power8 and above. Return EOPNOTSUPP when multiple
+ * branch filters are requested in the event attr.
+ *
+ * When opening event via perf_event_open(), branch_sample_type
+ * gets adjusted in perf_copy_attr(). Kernel will automatically
+ * adjust the branch_sample_type based on the event modifier
+ * settings to include PERF_SAMPLE_BRANCH_PLM_ALL. Hence drop
+ * the check for PERF_SAMPLE_BRANCH_PLM_ALL.
+ */
+ if (hweight64(event->attr.branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) > 1) {
+ local_irq_restore(irq_flags);
+ return -EOPNOTSUPP;
+ }
+
+ if (ppmu->bhrb_filter_map)
+ bhrb_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
if (bhrb_filter == -1) {
- put_cpu_var(cpu_hw_events);
+ local_irq_restore(irq_flags);
return -EOPNOTSUPP;
}
cpuhw->bhrb_filter = bhrb_filter;
}
- put_cpu_var(cpu_hw_events);
+ local_irq_restore(irq_flags);
if (err)
return -EINVAL;
@@ -2028,6 +2226,9 @@ static struct pmu power_pmu = {
.sched_task = power_pmu_sched_task,
};
+#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
+ PERF_SAMPLE_PHYS_ADDR | \
+ PERF_SAMPLE_DATA_PAGE_SIZE)
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
@@ -2063,7 +2264,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
left += period;
if (left <= 0)
left = period;
- record = siar_valid(regs);
+
+ /*
+ * If address is not requested in the sample via
+ * PERF_SAMPLE_IP, just record that sample irrespective
+ * of SIAR valid check.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_IP)
+ record = siar_valid(regs);
+ else
+ record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x80000000LL)
@@ -2076,6 +2287,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_event_update_userpage(event);
/*
+ * Due to hardware limitation, sometimes SIAR could sample a kernel
+ * address even when freeze on supervisor state (kernel) is set in
+ * MMCR2. Check attr.exclude_kernel and address to drop the sample in
+ * these cases.
+ */
+ if (event->attr.exclude_kernel &&
+ (event->attr.sample_type & PERF_SAMPLE_IP) &&
+ is_kernel_addr(mfspr(SPRN_SIAR)))
+ record = 0;
+
+ /*
* Finally record data if requested.
*/
if (record) {
@@ -2083,27 +2305,33 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
- if (event->attr.sample_type &
- (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
+ if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
perf_get_data_addr(event, regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw);
- data.br_stack = &cpuhw->bhrb_stack;
+ perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
}
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
- ppmu->get_mem_data_src)
+ ppmu->get_mem_data_src) {
ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
+ data.sample_flags |= PERF_SAMPLE_DATA_SRC;
+ }
- if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
- ppmu->get_mem_weight)
- ppmu->get_mem_weight(&data.weight);
-
+ if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
+ ppmu->get_mem_weight) {
+ ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
+ data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+ }
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
+ } else if (period) {
+ /* Account for interrupt in case of invalid SIAR */
+ if (perf_event_account_interrupt(event))
+ power_pmu_stop(event, 0);
}
}
@@ -2127,12 +2355,10 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
*/
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
- bool use_siar = regs_use_siar(regs);
+ unsigned long siar = mfspr(SPRN_SIAR);
- if (use_siar && siar_valid(regs))
- return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
- else if (use_siar)
- return 0; // no valid instruction pointer
+ if (regs_use_siar(regs) && siar_valid(regs) && siar)
+ return siar + perf_ip_adjust(regs);
else
return regs->nip;
}
@@ -2172,9 +2398,7 @@ static void __perf_event_interrupt(struct pt_regs *regs)
int i, j;
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
- unsigned long val[8];
int found, active;
- int nmi;
if (cpuhw->n_limited)
freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
@@ -2182,20 +2406,14 @@ static void __perf_event_interrupt(struct pt_regs *regs)
perf_read_regs(regs);
- nmi = perf_intr_is_nmi(regs);
- if (nmi)
- nmi_enter();
- else
- irq_enter();
-
/* Read all the PMCs since we'll need them a bunch of times */
for (i = 0; i < ppmu->n_counter; ++i)
- val[i] = read_pmc(i + 1);
+ cpuhw->pmcs[i] = read_pmc(i + 1);
/* Try to find what caused the IRQ */
found = 0;
for (i = 0; i < ppmu->n_counter; ++i) {
- if (!pmc_overflow(val[i]))
+ if (!pmc_overflow(cpuhw->pmcs[i]))
continue;
if (is_limited_pmc(i + 1))
continue; /* these won't generate IRQs */
@@ -2210,10 +2428,18 @@ static void __perf_event_interrupt(struct pt_regs *regs)
event = cpuhw->event[j];
if (event->hw.idx == (i + 1)) {
active = 1;
- record_and_restart(event, val[i], regs);
+ record_and_restart(event, cpuhw->pmcs[i], regs);
break;
}
}
+
+ /*
+ * Clear PACA_IRQ_PMI in case it was set by
+ * set_pmi_irq_pending() when PMU was enabled
+ * after accounting for interrupts.
+ */
+ clear_pmi_irq_pending();
+
if (!active)
/* reset non active counters that have overflowed */
write_pmc(i + 1, 0);
@@ -2224,17 +2450,24 @@ static void __perf_event_interrupt(struct pt_regs *regs)
event = cpuhw->event[i];
if (!event->hw.idx || is_limited_pmc(event->hw.idx))
continue;
- if (pmc_overflow_power7(val[event->hw.idx - 1])) {
+ if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 1])) {
/* event has overflowed in a buggy way*/
found = 1;
record_and_restart(event,
- val[event->hw.idx - 1],
+ cpuhw->pmcs[event->hw.idx - 1],
regs);
}
}
}
- if (!found && !nmi && printk_ratelimit())
- printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
+
+ /*
+ * During system wide profiling or while specific CPU is monitored for an
+ * event, some corner cases could cause PMC to overflow in idle path. This
+ * will trigger a PMI after waking up from idle. Since counter values are _not_
+ * saved/restored in idle path, can lead to below "Can't find PMC" message.
+ */
+ if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+ printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
/*
* Reset MMCR0 to its normal value. This will set PMXE and
@@ -2243,12 +2476,11 @@ static void __perf_event_interrupt(struct pt_regs *regs)
* XXX might want to use MSR.PM to keep the events frozen until
* we get back out of this interrupt.
*/
- write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
+
+ /* Clear the cpuhw->pmcs */
+ memset(&cpuhw->pmcs, 0, sizeof(cpuhw->pmcs));
- if (nmi)
- nmi_exit();
- else
- irq_exit();
}
static void perf_event_interrupt(struct pt_regs *regs)
@@ -2265,12 +2497,39 @@ static int power_pmu_prepare_cpu(unsigned int cpu)
if (ppmu) {
memset(cpuhw, 0, sizeof(*cpuhw));
- cpuhw->mmcr[0] = MMCR0_FC;
+ cpuhw->mmcr.mmcr0 = MMCR0_FC;
}
return 0;
}
-int register_power_pmu(struct power_pmu *pmu)
+static ssize_t pmu_name_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (ppmu)
+ return sysfs_emit(buf, "%s", ppmu->name);
+
+ return 0;
+}
+
+static DEVICE_ATTR_RO(pmu_name);
+
+static struct attribute *pmu_caps_attrs[] = {
+ &dev_attr_pmu_name.attr,
+ NULL
+};
+
+static const struct attribute_group pmu_caps_group = {
+ .name = "caps",
+ .attrs = pmu_caps_attrs,
+};
+
+static const struct attribute_group *pmu_caps_groups[] = {
+ &pmu_caps_group,
+ NULL,
+};
+
+int __init register_power_pmu(struct power_pmu *pmu)
{
if (ppmu)
return -EBUSY; /* something's already registered */
@@ -2281,6 +2540,11 @@ int register_power_pmu(struct power_pmu *pmu)
power_pmu.attr_groups = ppmu->attr_groups;
+ if (ppmu->flags & PPMU_ARCH_207S)
+ power_pmu.attr_update = pmu_caps_groups;
+
+ power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
+
#ifdef MSR_HV
/*
* Use FCHV to ignore kernel events if MSR.HV is set.
@@ -2296,8 +2560,24 @@ int register_power_pmu(struct power_pmu *pmu)
}
#ifdef CONFIG_PPC64
+static bool pmu_override = false;
+static unsigned long pmu_override_val;
+static void do_pmu_override(void *data)
+{
+ ppc_set_pmu_inuse(1);
+ if (pmu_override_val)
+ mtspr(SPRN_MMCR1, pmu_override_val);
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+}
+
static int __init init_ppc64_pmu(void)
{
+ if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
+ pr_warn("disabling perf due to pmu_override= command line option.\n");
+ on_each_cpu(do_pmu_override, NULL, 1);
+ return 0;
+ }
+
/* run through all the pmu drivers one at a time */
if (!init_power5_pmu())
return 0;
@@ -2311,10 +2591,33 @@ static int __init init_ppc64_pmu(void)
return 0;
else if (!init_power9_pmu())
return 0;
+ else if (!init_power10_pmu())
+ return 0;
+ else if (!init_power11_pmu())
+ return 0;
else if (!init_ppc970_pmu())
return 0;
else
return init_generic_compat_pmu();
}
early_initcall(init_ppc64_pmu);
+
+static int __init pmu_setup(char *str)
+{
+ unsigned long val;
+
+ if (!early_cpu_has_feature(CPU_FTR_HVMODE))
+ return 0;
+
+ pmu_override = true;
+
+ if (kstrtoul(str, 0, &val))
+ val = 0;
+
+ pmu_override_val = val;
+
+ return 1;
+}
+__setup("pmu_override=", pmu_setup);
+
#endif
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index e0e7e276bfd2..1a53ab08447c 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -31,19 +31,6 @@ static atomic_t num_events;
/* Used to avoid races in calling reserve/release_pmc_hardware */
static DEFINE_MUTEX(pmc_reserve_mutex);
-/*
- * If interrupts were soft-disabled when a PMU interrupt occurs, treat
- * it as an NMI.
- */
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
-#ifdef __powerpc64__
- return (regs->softe & IRQS_DISABLED);
-#else
- return 0;
-#endif
-}
-
static void perf_event_interrupt(struct pt_regs *regs);
/*
@@ -658,14 +645,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
unsigned long val;
- int found = 0;
- int nmi;
-
- nmi = perf_intr_is_nmi(regs);
- if (nmi)
- nmi_enter();
- else
- irq_enter();
for (i = 0; i < ppmu->n_counter; ++i) {
event = cpuhw->event[i];
@@ -674,7 +653,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
if ((int)val < 0) {
if (event) {
/* event has overflowed */
- found = 1;
record_and_restart(event, val, regs);
} else {
/*
@@ -690,18 +668,15 @@ static void perf_event_interrupt(struct pt_regs *regs)
mtmsr(mfmsr() | MSR_PMM);
mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
isync();
-
- if (nmi)
- nmi_exit();
- else
- irq_exit();
}
-void hw_perf_event_setup(int cpu)
+static int fsl_emb_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
memset(cpuhw, 0, sizeof(*cpuhw));
+
+ return 0;
}
int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
@@ -714,6 +689,8 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
pmu->name);
perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
+ cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
+ fsl_emb_pmu_prepare_cpu, NULL);
return 0;
}
diff --git a/arch/powerpc/perf/e500-pmu.c b/arch/powerpc/perf/e500-pmu.c
index a59c33bed32a..e3e1a68eb1d5 100644
--- a/arch/powerpc/perf/e500-pmu.c
+++ b/arch/powerpc/perf/e500-pmu.c
@@ -118,12 +118,13 @@ static struct fsl_emb_pmu e500_pmu = {
static int init_e500_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type)
- return -ENODEV;
+ unsigned int pvr = mfspr(SPRN_PVR);
- if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc"))
+ /* ec500mc */
+ if (PVR_VER(pvr) == PVR_VER_E500MC || PVR_VER(pvr) == PVR_VER_E5500)
num_events = 256;
- else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500"))
+ /* e500 */
+ else if (PVR_VER(pvr) != PVR_VER_E500V1 && PVR_VER(pvr) != PVR_VER_E500V2)
return -ENODEV;
return register_fsl_emb_pmu(&e500_pmu);
diff --git a/arch/powerpc/perf/e6500-pmu.c b/arch/powerpc/perf/e6500-pmu.c
index 44ad65da82ed..bd779a2338f8 100644
--- a/arch/powerpc/perf/e6500-pmu.c
+++ b/arch/powerpc/perf/e6500-pmu.c
@@ -107,8 +107,9 @@ static struct fsl_emb_pmu e6500_pmu = {
static int init_e6500_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e6500"))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_VER_E6500)
return -ENODEV;
return register_fsl_emb_pmu(&e6500_pmu);
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
index 5e5a54d5588e..b5c414876ed5 100644
--- a/arch/powerpc/perf/generic-compat-pmu.c
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -14,84 +14,165 @@
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * [ pmc ] [unit ] [ ] m [ pmcxsel ]
- * | |
- * | *- mark
- * |
- * |
- * *- combine
- *
- * Below uses IBM bit numbering.
- *
- * MMCR1[x:y] = unit (PMCxUNIT)
- * MMCR1[24] = pmc1combine[0]
- * MMCR1[25] = pmc1combine[1]
- * MMCR1[26] = pmc2combine[0]
- * MMCR1[27] = pmc2combine[1]
- * MMCR1[28] = pmc3combine[0]
- * MMCR1[29] = pmc3combine[1]
- * MMCR1[30] = pmc4combine[0]
- * MMCR1[31] = pmc4combine[1]
- *
+ * [ pmc ] [ pmcxsel ]
*/
/*
- * Some power9 event codes.
+ * Event codes defined in ISA v3.0B
*/
#define EVENT(_name, _code) _name = _code,
enum {
-EVENT(PM_CYC, 0x0001e)
-EVENT(PM_INST_CMPL, 0x00002)
+ /* Cycles, alternate code */
+ EVENT(PM_CYC_ALT, 0x100f0)
+ /* One or more instructions completed in a cycle */
+ EVENT(PM_CYC_INST_CMPL, 0x100f2)
+ /* Floating-point instruction completed */
+ EVENT(PM_FLOP_CMPL, 0x100f4)
+ /* Instruction ERAT/L1-TLB miss */
+ EVENT(PM_L1_ITLB_MISS, 0x100f6)
+ /* All instructions completed and none available */
+ EVENT(PM_NO_INST_AVAIL, 0x100f8)
+ /* A load-type instruction completed (ISA v3.0+) */
+ EVENT(PM_LD_CMPL, 0x100fc)
+ /* Instruction completed, alternate code (ISA v3.0+) */
+ EVENT(PM_INST_CMPL_ALT, 0x100fe)
+ /* A store-type instruction completed */
+ EVENT(PM_ST_CMPL, 0x200f0)
+ /* Instruction Dispatched */
+ EVENT(PM_INST_DISP, 0x200f2)
+ /* Run_cycles */
+ EVENT(PM_RUN_CYC, 0x200f4)
+ /* Data ERAT/L1-TLB miss/reload */
+ EVENT(PM_L1_DTLB_RELOAD, 0x200f6)
+ /* Taken branch completed */
+ EVENT(PM_BR_TAKEN_CMPL, 0x200fa)
+ /* Demand iCache Miss */
+ EVENT(PM_L1_ICACHE_MISS, 0x200fc)
+ /* L1 Dcache reload from memory */
+ EVENT(PM_L1_RELOAD_FROM_MEM, 0x200fe)
+ /* L1 Dcache store miss */
+ EVENT(PM_ST_MISS_L1, 0x300f0)
+ /* Alternate code for PM_INST_DISP */
+ EVENT(PM_INST_DISP_ALT, 0x300f2)
+ /* Branch direction or target mispredicted */
+ EVENT(PM_BR_MISPREDICT, 0x300f6)
+ /* Data TLB miss/reload */
+ EVENT(PM_DTLB_MISS, 0x300fc)
+ /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+ EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
+ /* L1 Dcache load miss */
+ EVENT(PM_LD_MISS_L1, 0x400f0)
+ /* Cycle when instruction(s) dispatched */
+ EVENT(PM_CYC_INST_DISP, 0x400f2)
+ /* Branch or branch target mispredicted */
+ EVENT(PM_BR_MPRED_CMPL, 0x400f6)
+ /* Instructions completed with run latch set */
+ EVENT(PM_RUN_INST_CMPL, 0x400fa)
+ /* Instruction TLB miss/reload */
+ EVENT(PM_ITLB_MISS, 0x400fc)
+ /* Load data not cached */
+ EVENT(PM_LD_NOT_CACHED, 0x400fe)
+ /* Instructions */
+ EVENT(PM_INST_CMPL, 0x500fa)
+ /* Cycles */
+ EVENT(PM_CYC, 0x600f4)
};
#undef EVENT
+/* Table of alternatives, sorted in increasing order of column 0 */
+/* Note that in each row, column 0 must be the smallest */
+static const unsigned int generic_event_alternatives[][MAX_ALT] = {
+ { PM_CYC_ALT, PM_CYC },
+ { PM_INST_CMPL_ALT, PM_INST_CMPL },
+ { PM_INST_DISP, PM_INST_DISP_ALT },
+};
+
+static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int num_alt = 0;
+
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(generic_event_alternatives), flags,
+ generic_event_alternatives);
+
+ return num_alt;
+}
+
GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_NO_INST_AVAIL);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
static struct attribute *generic_compat_events_attr[] = {
GENERIC_EVENT_PTR(PM_CYC),
GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_NO_INST_AVAIL),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
NULL
};
-static struct attribute_group generic_compat_pmu_events_group = {
+static const struct attribute_group generic_compat_pmu_events_group = {
.name = "events",
.attrs = generic_compat_events_attr,
};
PMU_FORMAT_ATTR(event, "config:0-19");
PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
-PMU_FORMAT_ATTR(mark, "config:8");
-PMU_FORMAT_ATTR(combine, "config:10-11");
-PMU_FORMAT_ATTR(unit, "config:12-15");
PMU_FORMAT_ATTR(pmc, "config:16-19");
static struct attribute *generic_compat_pmu_format_attr[] = {
&format_attr_event.attr,
&format_attr_pmcxsel.attr,
- &format_attr_mark.attr,
- &format_attr_combine.attr,
- &format_attr_unit.attr,
&format_attr_pmc.attr,
NULL,
};
-static struct attribute_group generic_compat_pmu_format_group = {
+static const struct attribute_group generic_compat_pmu_format_group = {
.name = "format",
.attrs = generic_compat_pmu_format_attr,
};
+static struct attribute *generic_compat_pmu_caps_attrs[] = {
+ NULL
+};
+
+static struct attribute_group generic_compat_pmu_caps_group = {
+ .name = "caps",
+ .attrs = generic_compat_pmu_caps_attrs,
+};
+
static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
&generic_compat_pmu_format_group,
&generic_compat_pmu_events_group,
+ &generic_compat_pmu_caps_group,
NULL,
};
static int compat_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_NO_INST_AVAIL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
@@ -101,15 +182,15 @@ static int compat_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_LD_MISS_L1,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ST_MISS_L1,
},
[ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -119,7 +200,7 @@ static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(L1I) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -133,7 +214,7 @@ static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(LL) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0,
@@ -147,7 +228,7 @@ static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_DTLB_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -161,7 +242,7 @@ static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ITLB_MISS,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -175,7 +256,7 @@ static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(BPU) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0,
- [ C(RESULT_MISS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -204,13 +285,30 @@ static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
#undef C
+/*
+ * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for
+ * PM_INST_CMPL and PM_CYC.
+ */
+static int generic_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags)
+{
+ int ret;
+
+ ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+ if (!ret)
+ mmcr->mmcr0 |= MMCR0_C56RUN;
+ return ret;
+}
+
static struct power_pmu generic_compat_pmu = {
- .name = "GENERIC_COMPAT",
+ .name = "ISAv3",
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
.test_adder = ISA207_TEST_ADDER,
- .compute_mmcr = isa207_compute_mmcr,
+ .compute_mmcr = generic_compute_mmcr,
.get_constraint = isa207_get_constraint,
+ .get_alternatives = generic_get_alternatives,
.disable_pmc = isa207_disable_pmc,
.flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
.n_generic = ARRAY_SIZE(compat_generic_events),
@@ -219,10 +317,20 @@ static struct power_pmu generic_compat_pmu = {
.attr_groups = generic_compat_pmu_attr_groups,
};
-int init_generic_compat_pmu(void)
+int __init init_generic_compat_pmu(void)
{
int rc = 0;
+ /*
+ * From ISA v2.07 on, PMU features are architected;
+ * we require >= v3.0 because (a) that has PM_LD_CMPL and
+ * PM_INST_CMPL_ALT, which v2.07 doesn't have, and
+ * (b) we don't expect any non-IBM Power ISA
+ * implementations that conform to v2.07 but not v3.0.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+
rc = register_power_pmu(&generic_compat_pmu);
if (rc)
return rc;
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 573e0b309c0c..057ec2e3451d 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -18,8 +18,10 @@
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/io.h>
+#include <asm/papr-sysparm.h>
#include <linux/byteorder/generic.h>
+#include <asm/rtas.h>
#include "hv-24x7.h"
#include "hv-24x7-catalog.h"
#include "hv-common.h"
@@ -30,7 +32,9 @@ static int interface_version;
/* Whether we have to aggregate result data for some domains. */
static bool aggregate_result_elements;
-static bool domain_is_valid(unsigned domain)
+static cpumask_t hv_24x7_cpumask;
+
+static bool domain_is_valid(unsigned int domain)
{
switch (domain) {
#define DOMAIN(n, v, x, c) \
@@ -44,7 +48,7 @@ static bool domain_is_valid(unsigned domain)
}
}
-static bool is_physical_domain(unsigned domain)
+static bool is_physical_domain(unsigned int domain)
{
switch (domain) {
#define DOMAIN(n, v, x, c) \
@@ -57,6 +61,51 @@ static bool is_physical_domain(unsigned domain)
}
}
+/*
+ * The Processor Module Information system parameter allows transferring
+ * of certain processor module information from the platform to the OS.
+ * Refer PAPR+ document to get parameter token value as '43'.
+ */
+
+static u32 phys_sockets; /* Physical sockets */
+static u32 phys_chipspersocket; /* Physical chips per socket*/
+static u32 phys_coresperchip; /* Physical cores per chip */
+
+/*
+ * read_24x7_sys_info()
+ * Retrieve the number of sockets and chips per socket and cores per
+ * chip details through the get-system-parameter rtas call.
+ */
+void read_24x7_sys_info(void)
+{
+ struct papr_sysparm_buf *buf;
+
+ /*
+ * Making system parameter: chips and sockets and cores per chip
+ * default to 1.
+ */
+ phys_sockets = 1;
+ phys_chipspersocket = 1;
+ phys_coresperchip = 1;
+
+ buf = papr_sysparm_buf_alloc();
+ if (!buf)
+ return;
+
+ if (!papr_sysparm_get(PAPR_SYSPARM_PROC_MODULE_INFO, buf)) {
+ int ntypes = be16_to_cpup((__be16 *)&buf->val[0]);
+ int len = be16_to_cpu(buf->len);
+
+ if (len >= 8 && ntypes != 0) {
+ phys_sockets = be16_to_cpup((__be16 *)&buf->val[2]);
+ phys_chipspersocket = be16_to_cpup((__be16 *)&buf->val[4]);
+ phys_coresperchip = be16_to_cpup((__be16 *)&buf->val[6]);
+ }
+ }
+
+ papr_sysparm_buf_free(buf);
+}
+
/* Domains for which more than one result element are returned for each event. */
static bool domain_needs_aggregation(unsigned int domain)
{
@@ -66,7 +115,7 @@ static bool domain_needs_aggregation(unsigned int domain)
domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
}
-static const char *domain_name(unsigned domain)
+static const char *domain_name(unsigned int domain)
{
if (!domain_is_valid(domain))
return NULL;
@@ -84,7 +133,7 @@ static const char *domain_name(unsigned domain)
return NULL;
}
-static bool catalog_entry_domain_is_valid(unsigned domain)
+static bool catalog_entry_domain_is_valid(unsigned int domain)
{
/* POWER8 doesn't support virtual domains. */
if (interface_version == 1)
@@ -142,7 +191,7 @@ static struct attribute *format_attrs[] = {
NULL,
};
-static struct attribute_group format_group = {
+static const struct attribute_group format_group = {
.name = "format",
.attrs = format_attrs,
};
@@ -164,14 +213,14 @@ static struct attribute_group event_long_desc_group = {
static struct kmem_cache *hv_page_cache;
-DEFINE_PER_CPU(int, hv_24x7_txn_flags);
-DEFINE_PER_CPU(int, hv_24x7_txn_err);
+static DEFINE_PER_CPU(int, hv_24x7_txn_flags);
+static DEFINE_PER_CPU(int, hv_24x7_txn_err);
struct hv_24x7_hw {
struct perf_event *events[255];
};
-DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
+static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
/*
* request_buffer and result_buffer are not required to be 4k aligned,
@@ -179,8 +228,8 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
* the simplest way to ensure that.
*/
#define H24x7_DATA_BUFFER_SIZE 4096
-DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
-DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
static unsigned int max_num_requests(int interface_version)
{
@@ -196,7 +245,7 @@ static char *event_name(struct hv_24x7_event_data *ev, int *len)
static char *event_desc(struct hv_24x7_event_data *ev, int *len)
{
- unsigned nl = be16_to_cpu(ev->event_name_len);
+ unsigned int nl = be16_to_cpu(ev->event_name_len);
__be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
*len = be16_to_cpu(*desc_len) - 2;
@@ -205,9 +254,9 @@ static char *event_desc(struct hv_24x7_event_data *ev, int *len)
static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
{
- unsigned nl = be16_to_cpu(ev->event_name_len);
+ unsigned int nl = be16_to_cpu(ev->event_name_len);
__be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
- unsigned desc_len = be16_to_cpu(*desc_len_);
+ unsigned int desc_len = be16_to_cpu(*desc_len_);
__be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
*len = be16_to_cpu(*long_desc_len) - 2;
@@ -234,8 +283,8 @@ static void *event_end(struct hv_24x7_event_data *ev, void *end)
{
void *start = ev;
__be16 *dl_, *ldl_;
- unsigned dl, ldl;
- unsigned nl = be16_to_cpu(ev->event_name_len);
+ unsigned int dl, ldl;
+ unsigned int nl = be16_to_cpu(ev->event_name_len);
if (nl < 2) {
pr_debug("%s: name length too short: %d", __func__, nl);
@@ -336,7 +385,7 @@ static long h_get_24x7_catalog_page(char page[], u64 version, u32 index)
* - Specifying (i.e overriding) values for other parameters
* is undefined.
*/
-static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
+static char *event_fmt(struct hv_24x7_event_data *event, unsigned int domain)
{
const char *sindex;
const char *lpar;
@@ -386,6 +435,30 @@ static ssize_t device_show_string(struct device *dev,
return sprintf(buf, "%s\n", (char *)d->var);
}
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask);
+}
+
+static ssize_t sockets_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", phys_sockets);
+}
+
+static ssize_t chipspersocket_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", phys_chipspersocket);
+}
+
+static ssize_t coresperchip_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", phys_coresperchip);
+}
+
static struct attribute *device_str_attr_create_(char *name, char *str)
{
struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
@@ -443,9 +516,9 @@ out_s:
return NULL;
}
-static struct attribute *event_to_attr(unsigned ix,
+static struct attribute *event_to_attr(unsigned int ix,
struct hv_24x7_event_data *event,
- unsigned domain,
+ unsigned int domain,
int nonce)
{
int event_name_len;
@@ -513,8 +586,8 @@ event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
return device_str_attr_create(name, nl, nonce, desc, dl);
}
-static int event_data_to_attrs(unsigned ix, struct attribute **attrs,
- struct hv_24x7_event_data *event, int nonce)
+static int event_data_to_attrs(unsigned int ix, struct attribute **attrs,
+ struct hv_24x7_event_data *event, int nonce)
{
*attrs = event_to_attr(ix, event, event->domain, nonce);
if (!*attrs)
@@ -528,8 +601,8 @@ struct event_uniq {
struct rb_node node;
const char *name;
int nl;
- unsigned ct;
- unsigned domain;
+ unsigned int ct;
+ unsigned int domain;
};
static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
@@ -542,8 +615,8 @@ static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
return memcmp(d1, d2, s1);
}
-static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
- size_t s2, unsigned d2)
+static int ev_uniq_ord(const void *v1, size_t s1, unsigned int d1,
+ const void *v2, size_t s2, unsigned int d2)
{
int r = memord(v1, s1, v2, s2);
@@ -557,7 +630,7 @@ static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
}
static int event_uniq_add(struct rb_root *root, const char *name, int nl,
- unsigned domain)
+ unsigned int domain)
{
struct rb_node **new = &(root->rb_node), *parent = NULL;
struct event_uniq *data;
@@ -670,7 +743,7 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
}
if (calc_ev_end > ev_end) {
- pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
+ pr_warn("event %zu exceeds its own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
event_idx, event, ev_end, offset, calc_ev_end);
return -1;
}
@@ -678,6 +751,14 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
return ev_len;
}
+/*
+ * Return true incase of invalid or dummy events with names like RESERVED*
+ */
+static bool ignore_event(const char *name)
+{
+ return strncmp(name, "RESERVED", 8) == 0;
+}
+
#define MAX_4K (SIZE_MAX / 4096)
static int create_events_from_catalog(struct attribute ***events_,
@@ -808,6 +889,10 @@ static int create_events_from_catalog(struct attribute ***events_,
name = event_name(event, &nl);
+ if (ignore_event(name)) {
+ junk_events++;
+ continue;
+ }
if (event->event_group_record_len == 0) {
pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
event_idx, nl, name);
@@ -869,6 +954,9 @@ static int create_events_from_catalog(struct attribute ***events_,
continue;
name = event_name(event, &nl);
+ if (ignore_event(name))
+ continue;
+
nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
ct = event_data_to_attrs(event_idx, events + event_attr_ct,
event, nonce);
@@ -1032,20 +1120,36 @@ PAGE_0_ATTR(catalog_len, "%lld\n",
(unsigned long long)be32_to_cpu(page_0->length) * 4096);
static BIN_ATTR_RO(catalog, 0/* real length varies */);
static DEVICE_ATTR_RO(domains);
+static DEVICE_ATTR_RO(sockets);
+static DEVICE_ATTR_RO(chipspersocket);
+static DEVICE_ATTR_RO(coresperchip);
+static DEVICE_ATTR_RO(cpumask);
static struct bin_attribute *if_bin_attrs[] = {
&bin_attr_catalog,
NULL,
};
+static struct attribute *cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group cpumask_attr_group = {
+ .attrs = cpumask_attrs,
+};
+
static struct attribute *if_attrs[] = {
&dev_attr_catalog_len.attr,
&dev_attr_catalog_version.attr,
&dev_attr_domains.attr,
+ &dev_attr_sockets.attr,
+ &dev_attr_chipspersocket.attr,
+ &dev_attr_coresperchip.attr,
NULL,
};
-static struct attribute_group if_group = {
+static const struct attribute_group if_group = {
.name = "interface",
.bin_attrs = if_bin_attrs,
.attrs = if_attrs,
@@ -1057,6 +1161,7 @@ static const struct attribute_group *attr_groups[] = {
&event_desc_group,
&event_long_desc_group,
&if_group,
+ &cpumask_attr_group,
NULL,
};
@@ -1233,7 +1338,7 @@ static int get_count_from_result(struct perf_event *event,
for (i = count = 0, element_data = res->elements + data_offset;
i < num_elements;
i++, element_data += data_size + data_offset)
- count += be64_to_cpu(*((u64 *) element_data));
+ count += be64_to_cpu(*((__be64 *)element_data));
*countp = count;
@@ -1280,7 +1385,7 @@ out:
static int h_24x7_event_init(struct perf_event *event)
{
struct hv_perf_caps caps;
- unsigned domain;
+ unsigned int domain;
unsigned long hret;
u64 ct;
@@ -1313,7 +1418,7 @@ static int h_24x7_event_init(struct perf_event *event)
}
domain = event_get_domain(event);
- if (domain >= HV_PERF_DOMAIN_MAX) {
+ if (domain == 0 || domain >= HV_PERF_DOMAIN_MAX) {
pr_devel("invalid domain %d\n", domain);
return -EINVAL;
}
@@ -1400,16 +1505,6 @@ static void h_24x7_event_read(struct perf_event *event)
h24x7hw = &get_cpu_var(hv_24x7_hw);
h24x7hw->events[i] = event;
put_cpu_var(h24x7hw);
- /*
- * Clear the event count so we can compute the _change_
- * in the 24x7 raw counter value at the end of the txn.
- *
- * Note that we could alternatively read the 24x7 value
- * now and save its value in event->hw.prev_count. But
- * that would require issuing a hcall, which would then
- * defeat the purpose of using the txn interface.
- */
- local64_set(&event->count, 0);
}
put_cpu_var(hv_24x7_reqb);
@@ -1567,20 +1662,60 @@ static struct pmu h_24x7_pmu = {
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
+static int ppc_hv_24x7_cpu_online(unsigned int cpu)
+{
+ if (cpumask_empty(&hv_24x7_cpumask))
+ cpumask_set_cpu(cpu, &hv_24x7_cpumask);
+
+ return 0;
+}
+
+static int ppc_hv_24x7_cpu_offline(unsigned int cpu)
+{
+ int target;
+
+ /* Check if exiting cpu is used for collecting 24x7 events */
+ if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask))
+ return 0;
+
+ /* Find a new cpu to collect 24x7 events */
+ target = cpumask_last(cpu_active_mask);
+
+ if (target < 0 || target >= nr_cpu_ids) {
+ pr_err("hv_24x7: CPU hotplug init failed\n");
+ return -1;
+ }
+
+ /* Migrate 24x7 events to the new target */
+ cpumask_set_cpu(target, &hv_24x7_cpumask);
+ perf_pmu_migrate_context(&h_24x7_pmu, cpu, target);
+
+ return 0;
+}
+
+static int hv_24x7_cpu_hotplug_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
+ "perf/powerpc/hv_24x7:online",
+ ppc_hv_24x7_cpu_online,
+ ppc_hv_24x7_cpu_offline);
+}
+
static int hv_24x7_init(void)
{
int r;
unsigned long hret;
+ unsigned int pvr = mfspr(SPRN_PVR);
struct hv_perf_caps caps;
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
pr_debug("not a virtualized system, not enabling\n");
return -ENODEV;
- } else if (!cur_cpu_spec->oprofile_cpu_type)
- return -ENODEV;
+ }
/* POWER8 only supports v1, while POWER9 only supports v2. */
- if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
+ if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
+ PVR_VER(pvr) == PVR_POWER8NVL)
interface_version = 1;
else {
interface_version = 2;
@@ -1611,10 +1746,17 @@ static int hv_24x7_init(void)
if (r)
return r;
+ /* init cpuhotplug */
+ r = hv_24x7_cpu_hotplug_init();
+ if (r)
+ return r;
+
r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
if (r)
return r;
+ read_24x7_sys_info();
+
return 0;
}
diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
index e608f9db12dd..5e86371a20c7 100644
--- a/arch/powerpc/perf/hv-gpci-requests.h
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -79,6 +79,7 @@ REQUEST(__field(0, 8, partition_id)
)
#include I(REQUEST_END)
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
/*
* Not available for counter_info_version >= 0x8, use
* run_instruction_cycles_by_partition(0x100) instead.
@@ -92,10 +93,11 @@ REQUEST(__field(0, 8, partition_id)
__count(0x10, 8, cycles)
)
#include I(REQUEST_END)
+#endif
#define REQUEST_NAME system_performance_capabilities
#define REQUEST_NUM 0x40
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
#include I(REQUEST_BEGIN)
REQUEST(__field(0, 1, perf_collect_privileged)
__field(0x1, 1, capability_mask)
@@ -103,6 +105,7 @@ REQUEST(__field(0, 1, perf_collect_privileged)
)
#include I(REQUEST_END)
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
#define REQUEST_NAME processor_bus_utilization_abc_links
#define REQUEST_NUM 0x50
#define REQUEST_IDX_KIND "hw_chip_id=?"
@@ -194,6 +197,7 @@ REQUEST(__field(0, 4, phys_processor_idx)
__count(0x28, 8, instructions_completed)
)
#include I(REQUEST_END)
+#endif
/* Processor_core_power_mode (0x95) skipped, no counters */
/* Affinity_domain_information_by_virtual_processor (0xA0) skipped,
@@ -223,7 +227,7 @@ REQUEST(__field(0, 2, partition_id)
#define REQUEST_NAME system_hypervisor_times
#define REQUEST_NUM 0xF0
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
#include I(REQUEST_BEGIN)
REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors)
__count(0x8, 8, time_spent_processing_virtual_processor_timers)
@@ -234,7 +238,7 @@ REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors)
#define REQUEST_NAME system_tlbie_count_and_time
#define REQUEST_NUM 0xF4
-#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
#include I(REQUEST_BEGIN)
REQUEST(__count(0, 8, tlbie_instructions_issued)
/*
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 6884d16ec19b..241551d1282f 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -48,6 +48,8 @@ EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
/* u32, byte offset */
EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
+static cpumask_t hv_gpci_cpumask;
+
static struct attribute *format_attrs[] = {
&format_attr_request.attr,
&format_attr_starting_index.attr,
@@ -63,14 +65,14 @@ static struct attribute *format_attrs[] = {
NULL,
};
-static struct attribute_group format_group = {
+static const struct attribute_group format_group = {
.name = "format",
.attrs = format_attrs,
};
static struct attribute_group event_group = {
.name = "events",
- .attrs = hv_gpci_event_attrs,
+ /* .attrs is set in init */
};
#define HV_CAPS_ATTR(_name, _format) \
@@ -94,7 +96,523 @@ static ssize_t kernel_version_show(struct device *dev,
return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
}
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
+}
+
+/* Interface attribute array index to store system information */
+#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6
+#define INTERFACE_PROCESSOR_CONFIG_ATTR 7
+#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8
+#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9
+#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10
+#define INTERFACE_NULL_ATTR 11
+
+/* Counter request value to retrieve system information */
+enum {
+ PROCESSOR_BUS_TOPOLOGY,
+ PROCESSOR_CONFIG,
+ AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
+ AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
+ AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
+};
+
+static int sysinfo_counter_request[] = {
+ [PROCESSOR_BUS_TOPOLOGY] = 0xD0,
+ [PROCESSOR_CONFIG] = 0x90,
+ [AFFINITY_DOMAIN_VIA_VP] = 0xA0,
+ [AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
+ [AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
+};
+
+static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
+
+static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
+ u16 secondary_index, char *buf,
+ size_t *n, struct hv_gpci_request_buffer *arg)
+{
+ unsigned long ret;
+ size_t i, j;
+
+ arg->params.counter_request = cpu_to_be32(req);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+ arg->params.secondary_index = cpu_to_be16(secondary_index);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+ * which means that the current buffer size cannot accommodate
+ * all the information and a partial buffer returned.
+ * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+ *
+ * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+ * performance information, and required to set
+ * "Enable Performance Information Collection" option.
+ */
+ if (ret == H_AUTHORITY)
+ return -EPERM;
+
+ /*
+ * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+ * because of invalid buffer-length/address or due to some hardware
+ * error.
+ */
+ if (ret && (ret != H_PARAMETER))
+ return -EIO;
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+ * to show the total number of counter_value array elements
+ * returned via hcall.
+ * hcall also populates 'cv_element_size' corresponds to individual
+ * counter_value array element size. Below loop go through all
+ * counter_value array elements as per their size and add it to
+ * the output buffer.
+ */
+ for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
+ j = i * be16_to_cpu(arg->params.cv_element_size);
+
+ for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]);
+ *n += sprintf(buf + *n, "\n");
+ }
+
+ if (*n >= PAGE_SIZE) {
+ pr_info("System information exceeds PAGE_SIZE\n");
+ return -EFBIG;
+ }
+
+ return ret;
+}
+
+static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0xD0 corresponds to request
+ * type 'Processor_bus_topology', to retrieve
+ * the system topology information.
+ * starting_index value implies the starting hardware
+ * chip id.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to make subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) +
+ (arg->bytes[last_element + 1] << 16) +
+ (arg->bytes[last_element] << 24) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0x90 corresponds to request
+ * type 'Processor_config', to retrieve
+ * the system processor information.
+ * starting_index value implies the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) +
+ (arg->bytes[last_element + 1] << 16) +
+ (arg->bytes[last_element] << 24) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request 0xA0 corresponds to request
+ * type 'Affinity_domain_information_by_virutal_processor',
+ * to retrieve the system affinity domain information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next secondary index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next secondary index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index and secondary index type is part of the
+ * counter_value buffer elements, use the starting index value in the
+ * last array element as subsequent starting index, and use secondary index
+ * value in the last array element plus 1 as subsequent secondary index.
+ * For counter request '0xA0', starting index points to partition id
+ * and secondary index points to corresponding virtual processor index.
+ */
+ u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
+ u16 secondary_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+ starting_index, secondary_index, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request 0xB0 corresponds to request
+ * type 'Affinity_domain_information_by_domain',
+ * to retrieve the system affinity domain information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 1] +
+ (arg->bytes[last_element] << 8) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static void affinity_domain_via_partition_result_parse(int returned_values,
+ int element_size, char *buf, size_t *last_element,
+ size_t *n, struct hv_gpci_request_buffer *arg)
+{
+ size_t i = 0, j = 0;
+ size_t k, l, m;
+ uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+ * to show the total number of counter_value array elements
+ * returned via hcall.
+ * Unlike other request types, the data structure returned by this
+ * request is variable-size. For this counter request type,
+ * hcall populates 'cv_element_size' corresponds to minimum size of
+ * the structure returned i.e; the size of the structure with no domain
+ * information. Below loop go through all counter_value array
+ * to determine the number and size of each domain array element and
+ * add it to the output buffer.
+ */
+ while (i < returned_values) {
+ k = j;
+ for (; k < j + element_size; k++)
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]);
+ *n += sprintf(buf + *n, "\n");
+
+ total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
+ size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
+
+ for (l = 0; l < total_affinity_domain_ele; l++) {
+ for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]);
+ k++;
+ }
+ *n += sprintf(buf + *n, "\n");
+ }
+
+ *n += sprintf(buf + *n, "\n");
+ i++;
+ j = k;
+ }
+
+ *last_element = k;
+}
+
+static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+ size_t last_element = 0;
+ u32 starting_index;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0xB1 corresponds to counter request
+ * type 'Affinity_domain_information_by_partition',
+ * to retrieve the system affinity domain by partition information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+ arg->params.starting_index = cpu_to_be32(0);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ if (!ret)
+ goto parse_result;
+
+ if (ret && (ret != H_PARAMETER))
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' implies that the current buffer size
+ * can't accommodate all the information, and a partial buffer
+ * returned. To handle that, we need to make subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ affinity_domain_via_partition_result_parse(
+ be16_to_cpu(arg->params.returned_values) - 1,
+ be16_to_cpu(arg->params.cv_element_size), buf,
+ &last_element, &n, arg);
+
+ if (n >= PAGE_SIZE) {
+ put_cpu_var(hv_gpci_reqb);
+ pr_debug("System information exceeds PAGE_SIZE\n");
+ return -EFBIG;
+ }
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting_index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ starting_index = (u8)arg->bytes[last_element] << 8 |
+ (u8)arg->bytes[last_element + 1];
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+ arg->params.counter_request = cpu_to_be32(
+ sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ if (ret && (ret != H_PARAMETER))
+ goto out;
+ }
+
+parse_result:
+ affinity_domain_via_partition_result_parse(
+ be16_to_cpu(arg->params.returned_values),
+ be16_to_cpu(arg->params.cv_element_size),
+ buf, &last_element, &n, arg);
+
+ put_cpu_var(hv_gpci_reqb);
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+ * which means that the current buffer size cannot accommodate
+ * all the information and a partial buffer returned.
+ * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+ *
+ * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+ * performance information, and required to set
+ * "Enable Performance Information Collection" option.
+ */
+ if (ret == H_AUTHORITY)
+ return -EPERM;
+
+ /*
+ * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+ * because of invalid buffer-length/address or due to some hardware
+ * error.
+ */
+ return -EIO;
+}
+
static DEVICE_ATTR_RO(kernel_version);
+static DEVICE_ATTR_RO(cpumask);
+
HV_CAPS_ATTR(version, "0x%x\n");
HV_CAPS_ATTR(ga, "%d\n");
HV_CAPS_ATTR(expanded, "%d\n");
@@ -108,10 +626,44 @@ static struct attribute *interface_attrs[] = {
&hv_caps_attr_expanded.attr,
&hv_caps_attr_lab.attr,
&hv_caps_attr_collect_privileged.attr,
+ /*
+ * This NULL is a placeholder for the processor_bus_topology
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the processor_config
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_virtual_processor
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_domain
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_partition
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ NULL,
+};
+
+static struct attribute *cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
NULL,
};
-static struct attribute_group interface_group = {
+static const struct attribute_group cpumask_attr_group = {
+ .attrs = cpumask_attrs,
+};
+
+static const struct attribute_group interface_group = {
.name = "interface",
.attrs = interface_attrs,
};
@@ -120,20 +672,10 @@ static const struct attribute_group *attr_groups[] = {
&format_group,
&event_group,
&interface_group,
+ &cpumask_attr_group,
NULL,
};
-#define HGPCI_REQ_BUFFER_SIZE 4096
-#define HGPCI_MAX_DATA_BYTES \
- (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
-
-static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
-
-struct hv_gpci_request_buffer {
- struct hv_get_perf_counter_info_params params;
- uint8_t bytes[HGPCI_MAX_DATA_BYTES];
-} __packed;
-
static unsigned long single_gpci_request(u32 req, u32 starting_index,
u16 secondary_index, u8 version_in, u32 offset, u8 length,
u64 *value)
@@ -153,6 +695,20 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL',
+ * specifies that the current buffer size cannot accommodate
+ * all the information and a partial buffer returned.
+ * Since in this function we are only accessing data for a given starting index,
+ * we don't need to accommodate whole data and can get required count by
+ * accessing first entry data.
+ * Hence hcall fails only incase the ret value is other than H_SUCCESS or
+ * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B).
+ */
+ if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B)
+ ret = 0;
+
if (ret) {
pr_devel("hcall failed: 0x%lx\n", ret);
goto out;
@@ -164,7 +720,7 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
*/
count = 0;
for (i = offset; i < offset + length; i++)
- count |= arg->bytes[i] << (i - offset);
+ count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
*value = count;
out:
@@ -217,6 +773,7 @@ static int h_gpci_event_init(struct perf_event *event)
{
u64 count;
u8 length;
+ unsigned long ret;
/* Not our event */
if (event->attr.type != event->pmu->type)
@@ -247,13 +804,23 @@ static int h_gpci_event_init(struct perf_event *event)
}
/* check if the request works... */
- if (single_gpci_request(event_get_request(event),
+ ret = single_gpci_request(event_get_request(event),
event_get_starting_index(event),
event_get_secondary_index(event),
event_get_counter_info_version(event),
event_get_offset(event),
length,
- &count)) {
+ &count);
+
+ /*
+ * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+ * performance information, and required to set
+ * "Enable Performance Information Collection" option.
+ */
+ if (ret == H_AUTHORITY)
+ return -EPERM;
+
+ if (ret) {
pr_devel("gpci hcall failed\n");
return -EINVAL;
}
@@ -275,11 +842,152 @@ static struct pmu h_gpci_pmu = {
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
+static int ppc_hv_gpci_cpu_online(unsigned int cpu)
+{
+ if (cpumask_empty(&hv_gpci_cpumask))
+ cpumask_set_cpu(cpu, &hv_gpci_cpumask);
+
+ return 0;
+}
+
+static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
+{
+ int target;
+
+ /* Check if exiting cpu is used for collecting gpci events */
+ if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
+ return 0;
+
+ /* Find a new cpu to collect gpci events */
+ target = cpumask_last(cpu_active_mask);
+
+ if (target < 0 || target >= nr_cpu_ids) {
+ pr_err("hv_gpci: CPU hotplug init failed\n");
+ return -1;
+ }
+
+ /* Migrate gpci events to the new target */
+ cpumask_set_cpu(target, &hv_gpci_cpumask);
+ perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
+
+ return 0;
+}
+
+static int hv_gpci_cpu_hotplug_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
+ "perf/powerpc/hv_gcpi:online",
+ ppc_hv_gpci_cpu_online,
+ ppc_hv_gpci_cpu_offline);
+}
+
+static struct device_attribute *sysinfo_device_attr_create(int
+ sysinfo_interface_group_index, u32 req)
+{
+ struct device_attribute *attr = NULL;
+ unsigned long ret;
+ struct hv_gpci_request_buffer *arg;
+
+ if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
+ sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
+ pr_info("Wrong interface group index for system information\n");
+ return NULL;
+ }
+
+ /* Check for given counter request value support */
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ arg->params.counter_request = cpu_to_be32(req);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ put_cpu_var(hv_gpci_reqb);
+
+ /*
+ * Add given counter request value attribute in the interface_attrs
+ * attribute array, only for valid return types.
+ */
+ if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return NULL;
+
+ sysfs_attr_init(&attr->attr);
+ attr->attr.mode = 0444;
+
+ switch (sysinfo_interface_group_index) {
+ case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
+ attr->attr.name = "processor_bus_topology";
+ attr->show = processor_bus_topology_show;
+ break;
+ case INTERFACE_PROCESSOR_CONFIG_ATTR:
+ attr->attr.name = "processor_config";
+ attr->show = processor_config_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
+ attr->attr.name = "affinity_domain_via_virtual_processor";
+ attr->show = affinity_domain_via_virtual_processor_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
+ attr->attr.name = "affinity_domain_via_domain";
+ attr->show = affinity_domain_via_domain_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
+ attr->attr.name = "affinity_domain_via_partition";
+ attr->show = affinity_domain_via_partition_show;
+ break;
+ }
+ } else
+ pr_devel("hcall failed, with error: 0x%lx\n", ret);
+
+ return attr;
+}
+
+static void add_sysinfo_interface_files(void)
+{
+ int sysfs_count;
+ struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
+ int i;
+
+ sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
+
+ /* Get device attribute for a given counter request value */
+ for (i = 0; i < sysfs_count; i++) {
+ attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
+ sysinfo_counter_request[i]);
+
+ if (!attr[i])
+ goto out;
+ }
+
+ /* Add sysinfo interface attributes in the interface_attrs attribute array */
+ for (i = 0; i < sysfs_count; i++)
+ interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
+
+ return;
+
+out:
+ /*
+ * The sysinfo interface attributes will be added, only if hcall passed for
+ * all the counter request values. Free the device attribute array incase
+ * of any hcall failure.
+ */
+ if (i > 0) {
+ while (i >= 0) {
+ kfree(attr[i]);
+ i--;
+ }
+ }
+}
+
static int hv_gpci_init(void)
{
int r;
unsigned long hret;
struct hv_perf_caps caps;
+ struct hv_gpci_request_buffer *arg;
hv_gpci_assert_offsets_correct();
@@ -295,13 +1003,52 @@ static int hv_gpci_init(void)
return -ENODEV;
}
+ /* init cpuhotplug */
+ r = hv_gpci_cpu_hotplug_init();
+ if (r)
+ return r;
+
/* sampling not supported */
h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the output
+ * counter_info_version value based on the system hypervisor.
+ * Pass the counter request 0x10 corresponds to request type
+ * 'Dispatch_timebase_by_processor', to get the supported
+ * counter_info_version.
+ */
+ arg->params.counter_request = cpu_to_be32(0x10);
+
+ r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+ if (r) {
+ pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
+ arg->params.counter_info_version_out = 0x8;
+ }
+
+ /*
+ * Use counter_info_version_out value to assign
+ * required hv-gpci event list.
+ */
+ if (arg->params.counter_info_version_out >= 0x8)
+ event_group.attrs = hv_gpci_event_attrs;
+ else
+ event_group.attrs = hv_gpci_event_attrs_v6;
+
+ put_cpu_var(hv_gpci_reqb);
+
r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
if (r)
return r;
+ /* sysinfo interface files are only available for power10 and above platforms */
+ if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
+ add_sysinfo_interface_files();
+
return 0;
}
diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
index a3053eda5dcc..c72020912dea 100644
--- a/arch/powerpc/perf/hv-gpci.h
+++ b/arch/powerpc/perf/hv-gpci.h
@@ -2,33 +2,6 @@
#ifndef LINUX_POWERPC_PERF_HV_GPCI_H_
#define LINUX_POWERPC_PERF_HV_GPCI_H_
-#include <linux/types.h>
-
-/* From the document "H_GetPerformanceCounterInfo Interface" v1.07 */
-
-/* H_GET_PERF_COUNTER_INFO argument */
-struct hv_get_perf_counter_info_params {
- __be32 counter_request; /* I */
- __be32 starting_index; /* IO */
- __be16 secondary_index; /* IO */
- __be16 returned_values; /* O */
- __be32 detail_rc; /* O, only needed when called via *_norets() */
-
- /*
- * O, size each of counter_value element in bytes, only set for version
- * >= 0x3
- */
- __be16 cv_element_size;
-
- /* I, 0 (zero) for versions < 0x3 */
- __u8 counter_info_version_in;
-
- /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */
- __u8 counter_info_version_out;
- __u8 reserved[0xC];
- __u8 counter_value[];
-} __packed;
-
/*
* counter info version => fw version/reference (spec version)
*
@@ -53,6 +26,7 @@ enum {
#define REQUEST_FILE "../hv-gpci-requests.h"
#define NAME_LOWER hv_gpci
#define NAME_UPPER HV_GPCI
+#define ENABLE_EVENTS_COUNTERINFO_V6
#include "req-gen/perf.h"
#undef REQUEST_FILE
#undef NAME_LOWER
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index cb50a9e1fd2d..8664a7d297ad 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -6,6 +6,7 @@
* (C) 2017 Anju T Sudhakar, IBM Corporation.
* (C) 2017 Hemant K Shaw, IBM Corporation.
*/
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/slab.h>
#include <asm/opal.h>
@@ -13,6 +14,7 @@
#include <asm/cputhreads.h>
#include <asm/smp.h>
#include <linux/string.h>
+#include <linux/spinlock.h>
/* Nest IMC data structures and variables */
@@ -44,6 +46,16 @@ static DEFINE_PER_CPU(u64 *, trace_imc_mem);
static struct imc_pmu_ref *trace_imc_refc;
static int trace_imc_mem_size;
+/*
+ * Global data structure used to avoid races between thread,
+ * core and trace-imc
+ */
+static struct imc_pmu_ref imc_global_refc = {
+ .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock),
+ .id = 0,
+ .refc = 0,
+};
+
static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
@@ -61,7 +73,7 @@ static struct attribute *imc_format_attrs[] = {
NULL,
};
-static struct attribute_group imc_format_group = {
+static const struct attribute_group imc_format_group = {
.name = "format",
.attrs = imc_format_attrs,
};
@@ -80,7 +92,7 @@ static struct attribute *trace_imc_format_attrs[] = {
NULL,
};
-static struct attribute_group trace_imc_format_group = {
+static const struct attribute_group trace_imc_format_group = {
.name = "format",
.attrs = trace_imc_format_attrs,
};
@@ -115,7 +127,7 @@ static struct attribute *imc_pmu_cpumask_attrs[] = {
NULL,
};
-static struct attribute_group imc_pmu_cpumask_attr_group = {
+static const struct attribute_group imc_pmu_cpumask_attr_group = {
.attrs = imc_pmu_cpumask_attrs,
};
@@ -229,8 +241,10 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
ct = of_get_child_count(pmu_events);
/* Get the event prefix */
- if (of_property_read_string(node, "events-prefix", &prefix))
+ if (of_property_read_string(node, "events-prefix", &prefix)) {
+ of_node_put(pmu_events);
return 0;
+ }
/* Get a global unit and scale data if available */
if (of_property_read_string(node, "scale", &g_scale))
@@ -244,8 +258,10 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
/* Allocate memory for the events */
pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
- if (!pmu->events)
+ if (!pmu->events) {
+ of_node_put(pmu_events);
return -ENOMEM;
+ }
ct = 0;
/* Parse the events and update the struct */
@@ -255,6 +271,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
ct++;
}
+ of_node_put(pmu_events);
+
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
if (!attr_group) {
@@ -281,6 +299,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attr_group->attrs = attrs;
do {
ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+ if (!ev_val_str)
+ continue;
dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
if (!dev_str)
continue;
@@ -288,6 +308,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attrs[j++] = dev_str;
if (pmu->events[i].scale) {
ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+ if (!ev_scale_str)
+ continue;
dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
if (!dev_str)
continue;
@@ -297,6 +319,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
if (pmu->events[i].unit) {
ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+ if (!ev_unit_str)
+ continue;
dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
if (!dev_str)
continue;
@@ -383,7 +407,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
get_hard_smp_processor_id(cpu));
/*
* If this is the last cpu in this chip then, skip the reference
- * count mutex lock and make the reference count on this chip zero.
+ * count lock and make the reference count on this chip zero.
*/
ref = get_nest_pmu_ref(cpu);
if (!ref)
@@ -445,15 +469,15 @@ static void nest_imc_counters_release(struct perf_event *event)
/*
* See if we need to disable the nest PMU.
* If no events are currently in use, then we have to take a
- * mutex to ensure that we don't race with another task doing
+ * lock to ensure that we don't race with another task doing
* enable or disable the nest counters.
*/
ref = get_nest_pmu_ref(event->cpu);
if (!ref)
return;
- /* Take the mutex lock for this node and then decrement the reference count */
- mutex_lock(&ref->lock);
+ /* Take the lock for this node and then decrement the reference count */
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
/*
* The scenario where this is true is, when perf session is
@@ -465,7 +489,7 @@ static void nest_imc_counters_release(struct perf_event *event)
* an OPAL call to disable the engine in that node.
*
*/
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return;
}
ref->refc--;
@@ -473,7 +497,7 @@ static void nest_imc_counters_release(struct perf_event *event)
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
return;
}
@@ -481,7 +505,7 @@ static void nest_imc_counters_release(struct perf_event *event)
WARN(1, "nest-imc: Invalid event reference count\n");
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
}
static int nest_imc_event_init(struct perf_event *event)
@@ -511,7 +535,7 @@ static int nest_imc_event_init(struct perf_event *event)
/*
* Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
- * Get the base memory addresss for this cpu.
+ * Get the base memory address for this cpu.
*/
chip_id = cpu_to_chip_id(event->cpu);
@@ -526,7 +550,7 @@ static int nest_imc_event_init(struct perf_event *event)
break;
}
pcni++;
- } while (pcni->vbase != 0);
+ } while (pcni->vbase);
if (!flag)
return -ENODEV;
@@ -540,26 +564,25 @@ static int nest_imc_event_init(struct perf_event *event)
/*
* Get the imc_pmu_ref struct for this node.
- * Take the mutex lock and then increment the count of nest pmu events
- * inited.
+ * Take the lock and then increment the count of nest pmu events inited.
*/
ref = get_nest_pmu_ref(event->cpu);
if (!ref)
return -EINVAL;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("nest-imc: Unable to start the counters for node %d\n",
node_id);
return rc;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
event->destroy = nest_imc_counters_release;
return 0;
@@ -595,9 +618,8 @@ static int core_imc_mem_init(int cpu, int size)
return -ENOMEM;
mem_info->vbase = page_address(page);
- /* Init the mutex */
core_imc_refc[core_id].id = core_id;
- mutex_init(&core_imc_refc[core_id].lock);
+ spin_lock_init(&core_imc_refc[core_id].lock);
rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
__pa((void *)mem_info->vbase),
@@ -664,7 +686,7 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
/*
* Check whether core_imc is registered. We could end up here
* if the cpuhotplug callback registration fails. i.e, callback
- * invokes the offline path for all sucessfully registered cpus.
+ * invokes the offline path for all successfully registered cpus.
* At this stage, core_imc pmu will not be registered and we
* should return here.
*
@@ -686,9 +708,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
} else {
/*
- * If this is the last cpu in this core then, skip taking refernce
- * count mutex lock for this core and directly zero "refc" for
- * this core.
+ * If this is the last cpu in this core then skip taking reference
+ * count lock for this core and directly zero "refc" for this core.
*/
opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(cpu));
@@ -698,6 +719,16 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu)
return -EINVAL;
ref->refc = 0;
+ /*
+ * Reduce the global reference count, if this is the
+ * last cpu in this core and core-imc event running
+ * in this cpu.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_CORE)
+ imc_global_refc.refc--;
+
+ spin_unlock(&imc_global_refc.lock);
}
return 0;
}
@@ -710,6 +741,23 @@ static int core_imc_pmu_cpumask_init(void)
ppc_core_imc_cpu_offline);
}
+static void reset_global_refc(struct perf_event *event)
+{
+ spin_lock(&imc_global_refc.lock);
+ imc_global_refc.refc--;
+
+ /*
+ * If no other thread is running any
+ * event for this domain(thread/core/trace),
+ * set the global id to zero.
+ */
+ if (imc_global_refc.refc <= 0) {
+ imc_global_refc.refc = 0;
+ imc_global_refc.id = 0;
+ }
+ spin_unlock(&imc_global_refc.lock);
+}
+
static void core_imc_counters_release(struct perf_event *event)
{
int rc, core_id;
@@ -720,17 +768,17 @@ static void core_imc_counters_release(struct perf_event *event)
/*
* See if we need to disable the IMC PMU.
* If no events are currently in use, then we have to take a
- * mutex to ensure that we don't race with another task doing
+ * lock to ensure that we don't race with another task doing
* enable or disable the core counters.
*/
core_id = event->cpu / threads_per_core;
- /* Take the mutex lock and decrement the refernce count for this core */
+ /* Take the lock and decrement the refernce count for this core */
ref = &core_imc_refc[core_id];
if (!ref)
return;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
/*
* The scenario where this is true is, when perf session is
@@ -742,7 +790,7 @@ static void core_imc_counters_release(struct perf_event *event)
* an OPAL call to disable the engine in that core.
*
*/
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return;
}
ref->refc--;
@@ -750,7 +798,7 @@ static void core_imc_counters_release(struct perf_event *event)
rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
return;
}
@@ -758,7 +806,9 @@ static void core_imc_counters_release(struct perf_event *event)
WARN(1, "core-imc: Invalid event reference count\n");
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
+
+ reset_global_refc(event);
}
static int core_imc_event_init(struct perf_event *event)
@@ -794,7 +844,6 @@ static int core_imc_event_init(struct perf_event *event)
if ((!pcmi->vbase))
return -ENODEV;
- /* Get the core_imc mutex for this core */
ref = &core_imc_refc[core_id];
if (!ref)
return -EINVAL;
@@ -802,22 +851,45 @@ static int core_imc_event_init(struct perf_event *event)
/*
* Core pmu units are enabled only when it is used.
* See if this is triggered for the first time.
- * If yes, take the mutex lock and enable the core counters.
+ * If yes, take the lock and enable the core counters.
* If not, just increment the count in core_imc_refc struct.
*/
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(event->cpu));
if (rc) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("core-imc: Unable to start the counters for core %d\n",
core_id);
return rc;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
+
+ /*
+ * Since the system can run either in accumulation or trace-mode
+ * of IMC at a time, core-imc events are allowed only if no other
+ * trace/thread imc events are enabled/monitored.
+ *
+ * Take the global lock, and check the refc.id
+ * to know whether any other trace/thread imc
+ * events are running.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
+ /*
+ * No other trace/thread imc events are running in
+ * the system, so set the refc.id to core-imc.
+ */
+ imc_global_refc.id = IMC_DOMAIN_CORE;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
event->destroy = core_imc_counters_release;
@@ -877,7 +949,23 @@ static int ppc_thread_imc_cpu_online(unsigned int cpu)
static int ppc_thread_imc_cpu_offline(unsigned int cpu)
{
- mtspr(SPRN_LDBAR, 0);
+ /*
+ * Set the bit 0 of LDBAR to zero.
+ *
+ * If bit 0 of LDBAR is unset, it will stop posting
+ * the counter data to memory.
+ * For thread-imc, bit 0 of LDBAR will be set to 1 in the
+ * event_add function. So reset this bit here, to stop the updates
+ * to memory in the cpu_offline path.
+ */
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
+ /* Reduce the refc if thread-imc event running on this cpu */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_THREAD)
+ imc_global_refc.refc--;
+ spin_unlock(&imc_global_refc.lock);
+
return 0;
}
@@ -898,7 +986,7 @@ static int thread_imc_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
- if (!capable(CAP_SYS_ADMIN))
+ if (!perfmon_capable())
return -EACCES;
/* Sampling not supported */
@@ -916,7 +1004,22 @@ static int thread_imc_event_init(struct perf_event *event)
if (!target)
return -EINVAL;
+ spin_lock(&imc_global_refc.lock);
+ /*
+ * Check if any other trace/core imc events are running in the
+ * system, if not set the global id to thread-imc.
+ */
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) {
+ imc_global_refc.id = IMC_DOMAIN_THREAD;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
+
event->pmu->task_ctx_nr = perf_sw_context;
+ event->destroy = reset_global_refc;
return 0;
}
@@ -928,16 +1031,16 @@ static bool is_thread_imc_pmu(struct perf_event *event)
return false;
}
-static u64 * get_event_base_addr(struct perf_event *event)
+static __be64 *get_event_base_addr(struct perf_event *event)
{
u64 addr;
if (is_thread_imc_pmu(event)) {
addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
- return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
+ return (__be64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
}
- return (u64 *)event->hw.event_base;
+ return (__be64 *)event->hw.event_base;
}
static void thread_imc_pmu_start_txn(struct pmu *pmu,
@@ -961,7 +1064,8 @@ static int thread_imc_pmu_commit_txn(struct pmu *pmu)
static u64 imc_read_counter(struct perf_event *event)
{
- u64 *addr, data;
+ __be64 *addr;
+ u64 data;
/*
* In-Memory Collection (IMC) counters are free flowing counters.
@@ -1035,25 +1139,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
/*
* imc pmus are enabled only when it is used.
* See if this is triggered for the first time.
- * If yes, take the mutex lock and enable the counters.
+ * If yes, take the lock and enable the counters.
* If not, just increment the count in ref count struct.
*/
ref = &core_imc_refc[core_id];
if (!ref)
return -EINVAL;
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("thread-imc: Unable to start the counter\
for core %d\n", core_id);
return -EINVAL;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
return 0;
}
@@ -1063,17 +1167,19 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
int core_id;
struct imc_pmu_ref *ref;
- mtspr(SPRN_LDBAR, 0);
-
core_id = smp_processor_id() / threads_per_core;
ref = &core_imc_refc[core_id];
+ if (!ref) {
+ pr_debug("imc: Failed to get event reference count\n");
+ return;
+ }
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
ref->refc--;
if (ref->refc == 0) {
if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("thread-imc: Unable to stop the counters\
for core %d\n", core_id);
return;
@@ -1081,7 +1187,11 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
} else if (ref->refc < 0) {
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
+
+ /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
/*
* Take a snapshot and calculate the delta and update
* the event counter values.
@@ -1118,9 +1228,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size)
}
}
- /* Init the mutex, if not already */
trace_imc_refc[core_id].id = core_id;
- mutex_init(&trace_imc_refc[core_id].lock);
+ spin_lock_init(&trace_imc_refc[core_id].lock);
mtspr(SPRN_LDBAR, 0);
return 0;
@@ -1133,7 +1242,18 @@ static int ppc_trace_imc_cpu_online(unsigned int cpu)
static int ppc_trace_imc_cpu_offline(unsigned int cpu)
{
- mtspr(SPRN_LDBAR, 0);
+ /*
+ * No need to set bit 0 of LDBAR to zero, as
+ * it is set to zero for imc trace-mode
+ *
+ * Reduce the refc if any trace-imc event running
+ * on this cpu.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_TRACE)
+ imc_global_refc.refc--;
+ spin_unlock(&imc_global_refc.lock);
+
return 0;
}
@@ -1178,11 +1298,30 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem,
header->size = sizeof(*header) + event->header_size;
header->misc = 0;
- if (is_kernel_addr(data->ip))
- header->misc |= PERF_RECORD_MISC_KERNEL;
- else
- header->misc |= PERF_RECORD_MISC_USER;
-
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) {
+ case 0:/* when MSR HV and PR not set in the trace-record */
+ header->misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+ break;
+ case 1: /* MSR HV is 0 and PR is 1 */
+ header->misc |= PERF_RECORD_MISC_GUEST_USER;
+ break;
+ case 2: /* MSR HV is 1 and PR is 0 */
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ break;
+ case 3: /* MSR HV is 1 and PR is 1 */
+ header->misc |= PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_info("IMC: Unable to set the flag based on MSR bits\n");
+ break;
+ }
+ } else {
+ if (is_kernel_addr(data->ip))
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ else
+ header->misc |= PERF_RECORD_MISC_USER;
+ }
perf_event_header__init_id(header, data, event);
return 0;
@@ -1207,7 +1346,7 @@ static void dump_trace_imc_data(struct perf_event *event)
/* If this is a valid record, create the sample */
struct perf_output_handle handle;
- if (perf_output_begin(&handle, event, header.size))
+ if (perf_output_begin(&handle, &data, event, header.size))
return;
perf_output_sample(&handle, &header, &data, event);
@@ -1226,29 +1365,26 @@ static int trace_imc_event_add(struct perf_event *event, int flags)
local_mem = get_trace_imc_event_base_addr();
ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
- if (core_imc_refc)
- ref = &core_imc_refc[core_id];
+ /* trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
if (!ref) {
- /* If core-imc is not enabled, use trace-imc reference count */
- if (trace_imc_refc)
- ref = &trace_imc_refc[core_id];
- if (!ref)
- return -EINVAL;
+ pr_debug("imc: Failed to get the event reference count\n");
+ return -EINVAL;
}
+
mtspr(SPRN_LDBAR, ldbar_value);
- mutex_lock(&ref->lock);
+ spin_lock(&ref->lock);
if (ref->refc == 0) {
if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
- mtspr(SPRN_LDBAR, 0);
return -EINVAL;
}
}
++ref->refc;
- mutex_unlock(&ref->lock);
-
+ spin_unlock(&ref->lock);
return 0;
}
@@ -1274,50 +1410,69 @@ static void trace_imc_event_del(struct perf_event *event, int flags)
int core_id = smp_processor_id() / threads_per_core;
struct imc_pmu_ref *ref = NULL;
- if (core_imc_refc)
- ref = &core_imc_refc[core_id];
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
if (!ref) {
- /* If core-imc is not enabled, use trace-imc reference count */
- if (trace_imc_refc)
- ref = &trace_imc_refc[core_id];
- if (!ref)
- return;
+ pr_debug("imc: Failed to get event reference count\n");
+ return;
}
- mtspr(SPRN_LDBAR, 0);
- mutex_lock(&ref->lock);
+
+ spin_lock(&ref->lock);
ref->refc--;
if (ref->refc == 0) {
if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
get_hard_smp_processor_id(smp_processor_id()))) {
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
return;
}
} else if (ref->refc < 0) {
ref->refc = 0;
}
- mutex_unlock(&ref->lock);
+ spin_unlock(&ref->lock);
+
trace_imc_event_stop(event, flags);
}
static int trace_imc_event_init(struct perf_event *event)
{
- struct task_struct *target;
-
if (event->attr.type != event->pmu->type)
return -ENOENT;
- if (!capable(CAP_SYS_ADMIN))
+ if (!perfmon_capable())
return -EACCES;
/* Return if this is a couting event */
if (event->attr.sample_period == 0)
return -ENOENT;
+ /*
+ * Take the global lock, and make sure
+ * no other thread is running any core/thread imc
+ * events
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
+ /*
+ * No core/thread imc events are running in the
+ * system, so set the refc.id to trace-imc.
+ */
+ imc_global_refc.id = IMC_DOMAIN_TRACE;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
+
event->hw.idx = -1;
- target = event->hw.target;
- event->pmu->task_ctx_nr = perf_hw_context;
+ /*
+ * There can only be a single PMU for perf_hw_context events which is assigned to
+ * core PMU. Hence use "perf_sw_context" for trace_imc.
+ */
+ event->pmu->task_ctx_nr = perf_sw_context;
+ event->destroy = reset_global_refc;
return 0;
}
@@ -1359,6 +1514,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.stop = trace_imc_event_stop;
pmu->pmu.read = trace_imc_event_read;
pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
+ break;
default:
break;
}
@@ -1380,10 +1536,10 @@ static int init_nest_pmu_ref(void)
i = 0;
for_each_node(nid) {
/*
- * Mutex lock to avoid races while tracking the number of
+ * Take the lock to avoid races while tracking the number of
* sessions using the chip's nest pmu units.
*/
- mutex_init(&nest_imc_refc[i].lock);
+ spin_lock_init(&nest_imc_refc[i].lock);
/*
* Loop to init the "id" with the node_id. Variable "i" initialized to
@@ -1429,10 +1585,10 @@ static void cleanup_all_core_imc_memory(void)
static void thread_imc_ldbar_disable(void *dummy)
{
/*
- * By Zeroing LDBAR, we disable thread-imc
- * updates.
+ * By setting 0th bit of LDBAR to zero, we disable thread-imc
+ * updates to memory.
*/
- mtspr(SPRN_LDBAR, 0);
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
}
void thread_imc_disable(void)
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
index f755c64da137..a70ac471a5a5 100644
--- a/arch/powerpc/perf/internal.h
+++ b/arch/powerpc/perf/internal.h
@@ -2,11 +2,13 @@
//
// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
-extern int init_ppc970_pmu(void);
-extern int init_power5_pmu(void);
-extern int init_power5p_pmu(void);
-extern int init_power6_pmu(void);
-extern int init_power7_pmu(void);
-extern int init_power8_pmu(void);
-extern int init_power9_pmu(void);
-extern int init_generic_compat_pmu(void);
+int __init init_ppc970_pmu(void);
+int __init init_power5_pmu(void);
+int __init init_power5p_pmu(void);
+int __init init_power6_pmu(void);
+int __init init_power7_pmu(void);
+int __init init_power8_pmu(void);
+int __init init_power9_pmu(void);
+int __init init_power10_pmu(void);
+int __init init_power11_pmu(void);
+int __init init_generic_compat_pmu(void);
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 4c86da5eb28a..56301b2bc8ae 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -21,7 +21,7 @@ PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
PMU_FORMAT_ATTR(thresh_start, "config:36-39");
PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
-struct attribute *isa207_pmu_format_attr[] = {
+static struct attribute *isa207_pmu_format_attr[] = {
&format_attr_event.attr,
&format_attr_pmcxsel.attr,
&format_attr_mark.attr,
@@ -37,7 +37,7 @@ struct attribute *isa207_pmu_format_attr[] = {
NULL,
};
-struct attribute_group isa207_pmu_format_group = {
+const struct attribute_group isa207_pmu_format_group = {
.name = "format",
.attrs = isa207_pmu_format_attr,
};
@@ -55,7 +55,9 @@ static bool is_event_valid(u64 event)
{
u64 valid_mask = EVENT_VALID_MASK;
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ valid_mask = p10_EVENT_VALID_MASK;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
valid_mask = p9_EVENT_VALID_MASK;
return !(event & ~valid_mask);
@@ -69,17 +71,25 @@ static inline bool is_event_marked(u64 event)
return false;
}
+static unsigned long sdar_mod_val(u64 event)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ return p10_SDAR_MODE(event);
+
+ return p9_SDAR_MODE(event);
+}
+
static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
{
/*
- * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in
- * continous sampling mode.
+ * MMCRA[SDAR_MODE] specifies how the SDAR should be updated in
+ * continuous sampling mode.
*
* Incase of Power8:
- * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
+ * MMCRA[SDAR_MODE] will be programmed as "0b01" for continuous sampling
* mode and will be un-changed when setting MMCRA[63] (Marked events).
*
- * Incase of Power9:
+ * Incase of Power9/power10:
* Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
* or if group already have any marked events.
* For rest
@@ -90,20 +100,65 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
- else if (p9_SDAR_MODE(event))
- *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+ else if (sdar_mod_val(event))
+ *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT;
else
*mmcra |= MMCRA_SDAR_MODE_DCACHE;
} else
*mmcra |= MMCRA_SDAR_MODE_TLB;
}
+static int p10_thresh_cmp_val(u64 value)
+{
+ int exp = 0;
+ u64 result = value;
+
+ if (!value)
+ return value;
+
+ /*
+ * Incase of P10, thresh_cmp value is not part of raw event code
+ * and provided via attr.config1 parameter. To program threshold in MMCRA,
+ * take a 18 bit number N and shift right 2 places and increment
+ * the exponent E by 1 until the upper 10 bits of N are zero.
+ * Write E to the threshold exponent and write the lower 8 bits of N
+ * to the threshold mantissa.
+ * The max threshold that can be written is 261120.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (value > 261120)
+ value = 261120;
+ while ((64 - __builtin_clzl(value)) > 8) {
+ exp++;
+ value >>= 2;
+ }
+
+ /*
+ * Note that it is invalid to write a mantissa with the
+ * upper 2 bits of mantissa being zero, unless the
+ * exponent is also zero.
+ */
+ if (!(value & 0xC0) && exp)
+ result = -1;
+ else
+ result = (exp << 8) | value;
+ }
+ return result;
+}
+
static u64 thresh_cmp_val(u64 value)
{
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ value = p10_thresh_cmp_val(value);
+
+ /*
+ * Since location of threshold compare bits in MMCRA
+ * is different for p8, using different shift value.
+ */
if (cpu_has_feature(CPU_FTR_ARCH_300))
return value << p9_MMCRA_THR_CMP_SHIFT;
-
- return value << MMCRA_THR_CMP_SHIFT;
+ else
+ return value << MMCRA_THR_CMP_SHIFT;
}
static unsigned long combine_from_event(u64 event)
@@ -131,10 +186,14 @@ static bool is_thresh_cmp_valid(u64 event)
{
unsigned int cmp, exp;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ return p10_thresh_cmp_val(event) >= 0;
+
/*
* Check the mantissa upper two bits are not zero, unless the
* exponent is also zero. See the THRESH_CMP_MANTISSA doc.
*/
+
cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
exp = cmp >> 7;
@@ -161,36 +220,82 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
/* Nothing to do */
break;
case 1:
- ret = PH(LVL, L1);
+ ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT);
break;
case 2:
- ret = PH(LVL, L2);
+ ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
break;
case 3:
- ret = PH(LVL, L3);
+ ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
break;
case 4:
- if (sub_idx <= 1)
- ret = PH(LVL, LOC_RAM);
- else if (sub_idx > 1 && sub_idx <= 2)
- ret = PH(LVL, REM_RAM1);
- else
- ret = PH(LVL, REM_RAM2);
- ret |= P(SNOOP, HIT);
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ ret = P(SNOOP, HIT);
+
+ if (sub_idx == 1)
+ ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
+ else if (sub_idx == 2 || sub_idx == 3)
+ ret |= P(LVL, HIT) | LEVEL(PMEM);
+ else if (sub_idx == 4)
+ ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2);
+ else if (sub_idx == 5 || sub_idx == 7)
+ ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
+ else if (sub_idx == 6)
+ ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3);
+ } else {
+ if (sub_idx <= 1)
+ ret = PH(LVL, LOC_RAM);
+ else if (sub_idx > 1 && sub_idx <= 2)
+ ret = PH(LVL, REM_RAM1);
+ else
+ ret = PH(LVL, REM_RAM2);
+ ret |= P(SNOOP, HIT);
+ }
break;
case 5:
- ret = PH(LVL, REM_CCE1);
- if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
- ret |= P(SNOOP, HIT);
- else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
- ret |= P(SNOOP, HITM);
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ ret = REM | P(HOPS, 0);
+
+ if (sub_idx == 0 || sub_idx == 4)
+ ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
+ else if (sub_idx == 1 || sub_idx == 5)
+ ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM);
+ else if (sub_idx == 2 || sub_idx == 6)
+ ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+ else if (sub_idx == 3 || sub_idx == 7)
+ ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+ } else {
+ if (sub_idx == 0)
+ ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0);
+ else if (sub_idx == 1)
+ ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0);
+ else if (sub_idx == 2 || sub_idx == 4)
+ ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0);
+ else if (sub_idx == 3 || sub_idx == 5)
+ ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0);
+ }
break;
case 6:
- ret = PH(LVL, REM_CCE2);
- if ((sub_idx == 0) || (sub_idx == 2))
- ret |= P(SNOOP, HIT);
- else if ((sub_idx == 1) || (sub_idx == 3))
- ret |= P(SNOOP, HITM);
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (sub_idx == 0)
+ ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HIT) | P(HOPS, 2);
+ else if (sub_idx == 1)
+ ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HITM) | P(HOPS, 2);
+ else if (sub_idx == 2)
+ ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HIT) | P(HOPS, 3);
+ else if (sub_idx == 3)
+ ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HITM) | P(HOPS, 3);
+ } else {
+ ret = PH(LVL, REM_CCE2);
+ if (sub_idx == 0 || sub_idx == 2)
+ ret |= P(SNOOP, HIT);
+ else if (sub_idx == 1 || sub_idx == 3)
+ ret |= P(SNOOP, HITM);
+ }
break;
case 7:
ret = PM(LVL, L1);
@@ -216,30 +321,84 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
sier = mfspr(SPRN_SIER);
val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
- if (val == 1 || val == 2) {
- idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
- sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
+ if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31)))
+ return;
- dsrc->val = isa207_find_source(idx, sub_idx);
+ idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
+ sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
+
+ dsrc->val = isa207_find_source(idx, sub_idx);
+ if (val == 7) {
+ u64 mmcra;
+ u32 op_type;
+
+ /*
+ * Type 0b111 denotes either larx or stcx instruction. Use the
+ * MMCRA sampling bits [57:59] along with the type value
+ * to determine the exact instruction type. If the sampling
+ * criteria is neither load or store, set the type as default
+ * to NA.
+ */
+ mmcra = mfspr(SPRN_MMCRA);
+
+ op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK;
+ switch (op_type) {
+ case 5:
+ dsrc->val |= P(OP, LOAD);
+ break;
+ case 7:
+ dsrc->val |= P(OP, STORE);
+ break;
+ default:
+ dsrc->val |= P(OP, NA);
+ break;
+ }
+ } else {
dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE);
}
}
-void isa207_get_mem_weight(u64 *weight)
+void isa207_get_mem_weight(u64 *weight, u64 type)
{
+ union perf_sample_weight *weight_fields;
+ u64 weight_lat;
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
u64 sier = mfspr(SPRN_SIER);
u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
- if (val == 0 || val == 7)
- *weight = 0;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
+
+ if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31)))
+ weight_lat = 0;
else
- *weight = mantissa << (2 * exp);
+ weight_lat = mantissa << (2 * exp);
+
+ /*
+ * Use 64 bit weight field (full) if sample type is
+ * WEIGHT.
+ *
+ * if sample type is WEIGHT_STRUCT:
+ * - store memory latency in the lower 32 bits.
+ * - For ISA v3.1, use remaining two 16 bit fields of
+ * perf_sample_weight to store cycle counter values
+ * from sier2.
+ */
+ weight_fields = (union perf_sample_weight *)weight;
+ if (type & PERF_SAMPLE_WEIGHT)
+ weight_fields->full = weight_lat;
+ else {
+ weight_fields->var1_dw = (u32)weight_lat;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
+ weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
+ }
+ }
}
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
{
unsigned int unit, pmc, cache, ebb;
unsigned long mask, value;
@@ -251,7 +410,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
- cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ cache = (event >> EVENT_CACHE_SEL_SHIFT) &
+ p10_EVENT_CACHE_SEL_MASK;
+ else
+ cache = (event >> EVENT_CACHE_SEL_SHIFT) &
+ EVENT_CACHE_SEL_MASK;
ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;
if (pmc) {
@@ -269,6 +433,15 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
mask |= CNST_PMC_MASK(pmc);
value |= CNST_PMC_VAL(pmc);
+
+ /*
+ * PMC5 and PMC6 are used to count cycles and instructions and
+ * they do not support most of the constraint bits. Add a check
+ * to exclude PMC5/6 from most of the constraints except for
+ * EBB/BHRB.
+ */
+ if (pmc >= 5)
+ goto ebb_bhrb;
}
if (pmc <= 4) {
@@ -283,7 +456,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
if (unit >= 6 && unit <= 9) {
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (unit == 6) {
+ mask |= CNST_L2L3_GROUP_MASK;
+ value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
+ }
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
mask |= CNST_CACHE_GROUP_MASK;
value |= CNST_CACHE_GROUP_VAL(event & 0xff);
@@ -308,16 +486,30 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
value |= CNST_L1_QUAL_VAL(cache);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mask |= CNST_RADIX_SCOPE_GROUP_MASK;
+ value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT);
+ }
+
if (is_event_marked(event)) {
mask |= CNST_SAMPLE_MASK;
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
}
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) {
+ mask |= CNST_THRESH_CTL_SEL_MASK;
+ value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+ mask |= p10_CNST_THRESH_CMP_MASK;
+ value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1));
+ } else if (event_is_threshold(event))
+ return -1;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
mask |= CNST_THRESH_MASK;
value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
- }
+ } else if (event_is_threshold(event))
+ return -1;
} else {
/*
* Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
@@ -335,6 +527,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
}
+ebb_bhrb:
if (!pmc && ebb)
/* EBB events must specify the PMC */
return -1;
@@ -353,8 +546,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
* EBB events are pinned & exclusive, so this should never actually
* hit, but we leave it as a fallback in case.
*/
- mask |= CNST_EBB_VAL(ebb);
- value |= CNST_EBB_MASK;
+ mask |= CNST_EBB_MASK;
+ value |= CNST_EBB_VAL(ebb);
*maskp = mask;
*valp = value;
@@ -363,10 +556,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
int isa207_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[],
- struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags)
{
unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
+ unsigned long mmcr3;
unsigned int pmc, pmc_inuse;
int i;
@@ -379,7 +573,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
pmc_inuse |= 1 << pmc;
}
- mmcra = mmcr1 = mmcr2 = 0;
+ mmcra = mmcr1 = mmcr2 = mmcr3 = 0;
+
+ /*
+ * Disable bhrb unless explicitly requested
+ * by setting MMCRA (BHRBRD) bit.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ mmcra |= MMCRA_BHRB_DISABLE;
/* Second pass: assign PMCs, set all MMCR1 fields */
for (i = 0; i < n_ev; ++i) {
@@ -416,6 +617,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
}
}
+ /* Set RADIX_SCOPE_QUAL bit */
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) &
+ p10_EVENT_RADIX_SCOPE_QUAL_MASK;
+ mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT;
+ }
+
if (is_event_marked(event[i])) {
mmcra |= MMCRA_SAMPLE_ENABLE;
@@ -438,8 +646,21 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcra |= val << MMCRA_THR_CTL_SHIFT;
val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
mmcra |= val << MMCRA_THR_SEL_SHIFT;
- val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
- mmcra |= thresh_cmp_val(val);
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ val = (event[i] >> EVENT_THR_CMP_SHIFT) &
+ EVENT_THR_CMP_MASK;
+ mmcra |= thresh_cmp_val(val);
+ } else if (flags & PPMU_HAS_ATTR_CONFIG1) {
+ val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) &
+ p10_EVENT_THR_CMP_MASK;
+ mmcra |= thresh_cmp_val(val);
+ }
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
+ val = (event[i] >> p10_L2L3_EVENT_SHIFT) &
+ p10_EVENT_L2L3_SEL_MASK;
+ mmcr2 |= val << p10_L2L3_SEL_SHIFT;
}
if (event[i] & EVENT_WANTS_BHRB) {
@@ -447,6 +668,11 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcra |= val << MMCRA_IFM_SHIFT;
}
+ /* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ (has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB)))
+ mmcra &= ~MMCRA_BHRB_DISABLE;
+
if (pevents[i]->attr.exclude_user)
mmcr2 |= MMCR2_FCP(pmc);
@@ -460,34 +686,54 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
mmcr2 |= MMCR2_FCS(pmc);
}
+ if (pevents[i]->attr.exclude_idle)
+ mmcr2 |= MMCR2_FCWAIT(pmc);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (pmc <= 4) {
+ val = (event[i] >> p10_EVENT_MMCR3_SHIFT) &
+ p10_EVENT_MMCR3_MASK;
+ mmcr3 |= val << MMCR3_SHIFT(pmc);
+ }
+ }
+
hwc[i] = pmc - 1;
}
/* Return MMCRx values */
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
/* pmc_inuse is 1-based */
if (pmc_inuse & 2)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0x7c)
- mmcr[0] |= MMCR0_PMCjCE;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
/* If we're not using PMC 5 or 6, freeze them */
if (!(pmc_inuse & 0x60))
- mmcr[0] |= MMCR0_FC56;
+ mmcr->mmcr0 |= MMCR0_FC56;
+
+ /*
+ * Set mmcr0 (PMCCEXT) for p10 which
+ * will restrict access to group B registers
+ * when MMCR0 PMCC=0b00.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ mmcr->mmcr0 |= MMCR0_PMCCEXT;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
- mmcr[3] = mmcr2;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
+ mmcr->mmcr2 = mmcr2;
+ mmcr->mmcr3 = mmcr3;
return 0;
}
-void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 3)
- mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
+ mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
}
static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size)
@@ -550,3 +796,45 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
return num_alt;
}
+
+int isa3XX_check_attr_config(struct perf_event *ev)
+{
+ u64 val, sample_mode;
+ u64 event = ev->attr.config;
+
+ val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+ sample_mode = val & 0x3;
+
+ /*
+ * MMCRA[61:62] is Random Sampling Mode (SM).
+ * value of 0b11 is reserved.
+ */
+ if (sample_mode == 0x3)
+ return -EINVAL;
+
+ /*
+ * Check for all reserved value
+ * Source: Performance Monitoring Unit User Guide
+ */
+ switch (val) {
+ case 0x5:
+ case 0x9:
+ case 0xD:
+ case 0x19:
+ case 0x1D:
+ case 0x1A:
+ case 0x1E:
+ return -EINVAL;
+ }
+
+ /*
+ * MMCRA[48:51]/[52:55]) Threshold Start/Stop
+ * Events Selection.
+ * 0b11110000/0b00001111 is reserved.
+ */
+ val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
+ if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF))
+ return -EINVAL;
+
+ return 0;
+}
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 63fd4f3f6013..f594fa6580d1 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -13,6 +13,8 @@
#include <asm/firmware.h>
#include <asm/cputable.h>
+#include "internal.h"
+
#define EVENT_EBB_MASK 1ull
#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT
#define EVENT_BHRB_MASK 1ull
@@ -87,20 +89,53 @@
EVENT_LINUX_MASK | \
EVENT_PSEL_MASK))
+/* Contants to support power10 raw encoding format */
+#define p10_SDAR_MODE_SHIFT 22
+#define p10_SDAR_MODE_MASK 0x3ull
+#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \
+ p10_SDAR_MODE_MASK)
+#define p10_EVENT_L2L3_SEL_MASK 0x1f
+#define p10_L2L3_SEL_SHIFT 3
+#define p10_L2L3_EVENT_SHIFT 40
+#define p10_EVENT_THRESH_MASK 0xffffull
+#define p10_EVENT_CACHE_SEL_MASK 0x3ull
+#define p10_EVENT_MMCR3_MASK 0x7fffull
+#define p10_EVENT_MMCR3_SHIFT 45
+#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT 9
+#define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1
+#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45
+
+/* Event Threshold Compare bit constant for power10 in config1 attribute */
+#define p10_EVENT_THR_CMP_SHIFT 0
+#define p10_EVENT_THR_CMP_MASK 0x3FFFFull
+
+#define p10_EVENT_VALID_MASK \
+ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \
+ (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
+ (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
+ (p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
+ (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
+ (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
+ (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \
+ (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \
+ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
+ (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) | \
+ EVENT_LINUX_MASK | \
+ EVENT_PSEL_MASK))
/*
* Layout of constraint bits:
*
* 60 56 52 48 44 40 36 32
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
* [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ]
- * |
- * thresh_sel -*
+ * | |
+ * [ thresh_cmp bits for p10] thresh_sel -*
*
* 28 24 20 16 12 8 4 0
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1]
- * | | | |
- * BHRB IFM -* | | | Count of events for each PMC.
+ * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1]
+ * | | | | |
+ * BHRB IFM -* | | |*radix_scope | Count of events for each PMC.
* EBB -* | | p1, p2, p3, p4, p5, p6.
* L1 I/D qualifier -* |
* nc - number of counters -*
@@ -118,6 +153,12 @@
#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK)
+#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32)
+#define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff)
+
+#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43)
+#define p10_CNST_THRESH_CMP_MASK p10_CNST_THRESH_CMP_VAL(0x7ff)
+
#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
@@ -135,6 +176,12 @@
#define CNST_CACHE_PMC4_VAL (1ull << 54)
#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL
+#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55)
+#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f)
+
+#define CNST_RADIX_SCOPE_GROUP_VAL(v) (((v) & 0x1ull) << 21)
+#define CNST_RADIX_SCOPE_GROUP_MASK CNST_RADIX_SCOPE_GROUP_VAL(1)
+
/*
* For NC we are counting up to 4 events. This requires three bits, and we need
* the fifth event to overflow and set the 4th bit. To achieve that we bias the
@@ -173,6 +220,7 @@
/* Bits in MMCRA for PowerISA v2.07 */
#define MMCRA_SAMP_MODE_SHIFT 1
#define MMCRA_SAMP_ELIG_SHIFT 4
+#define MMCRA_SAMP_ELIG_MASK 7
#define MMCRA_THR_CTL_SHIFT 8
#define MMCRA_THR_SEL_SHIFT 16
#define MMCRA_THR_CMP_SHIFT 32
@@ -191,17 +239,25 @@
#define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
MMCRA_THR_CTR_EXP_MASK)
-/* MMCR1 Threshold Compare bit constant for power9 */
+#define P10_MMCRA_THR_CTR_MANT_MASK 0xFFul
+#define P10_MMCRA_THR_CTR_MANT(v) (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\
+ P10_MMCRA_THR_CTR_MANT_MASK)
+
+/* MMCRA Threshold Compare bit constant for power9 */
#define p9_MMCRA_THR_CMP_SHIFT 45
/* Bits in MMCR2 for PowerISA v2.07 */
#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9)))
#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9)))
+#define MMCR2_FCWAIT(pmc) (1ull << (58 - (((pmc) - 1) * 9)))
#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9)))
#define MAX_ALT 2
#define MAX_PMU_COUNTERS 6
+/* Bits in MMCR3 for PowerISA v3.10 */
+#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1)))
+
#define ISA207_SIER_TYPE_SHIFT 15
#define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT)
@@ -211,19 +267,27 @@
#define ISA207_SIER_DATA_SRC_SHIFT 53
#define ISA207_SIER_DATA_SRC_MASK (0x7ull << ISA207_SIER_DATA_SRC_SHIFT)
+/* Bits in SIER2/SIER3 for Power10 */
+#define P10_SIER2_FINISH_CYC(sier2) (((sier2) >> (63 - 37)) & 0x7fful)
+#define P10_SIER2_DISPATCH_CYC(sier2) (((sier2) >> (63 - 13)) & 0x7fful)
+
#define P(a, b) PERF_MEM_S(a, b)
#define PH(a, b) (P(LVL, HIT) | P(a, b))
#define PM(a, b) (P(LVL, MISS) | P(a, b))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp);
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
int isa207_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[],
- struct perf_event *pevents[]);
-void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]);
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags);
+void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr);
int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
const unsigned int ev_alt[][MAX_ALT]);
void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
struct pt_regs *regs);
-void isa207_get_mem_weight(u64 *weight);
+void isa207_get_mem_weight(u64 *weight, u64 type);
+
+int isa3XX_check_attr_config(struct perf_event *ev);
#endif
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
index 4d5ef92511d1..db451b9aac35 100644
--- a/arch/powerpc/perf/mpc7450-pmu.c
+++ b/arch/powerpc/perf/mpc7450-pmu.c
@@ -148,7 +148,7 @@ static u32 classbits[N_CLASSES - 1][2] = {
};
static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, class;
u32 mask, value;
@@ -257,8 +257,9 @@ static const u32 pmcsel_mask[N_COUNTER] = {
* Compute MMCR0/1/2 values for a set of events.
*/
static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
- unsigned long mmcr[],
- struct perf_event *pevents[])
+ struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
u8 event_index[N_CLASSES][N_COUNTER];
int n_classevent[N_CLASSES];
@@ -321,9 +322,16 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
mmcr0 |= MMCR0_PMCnCE;
/* Return MMCRx values */
- mmcr[0] = mmcr0;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcr2;
+ mmcr->mmcr0 = mmcr0;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcr2 = mmcr2;
+ /*
+ * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define
+ * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2`
+ * value to ensure that any write to this SPRN_MMCRA will
+ * use mmcr2 value.
+ */
+ mmcr->mmcra = mmcr2;
return 0;
}
@@ -331,12 +339,12 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
* Disable counting by a PMC.
* Note that the pmc argument is 0-based here, not 1-based.
*/
-static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 1)
- mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
+ mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
else
- mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
+ mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
}
static int mpc7450_generic_events[] = {
@@ -354,7 +362,7 @@ static int mpc7450_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x225 },
[C(OP_WRITE)] = { 0, 0x227 },
@@ -409,8 +417,9 @@ struct power_pmu mpc7450_pmu = {
static int __init init_mpc7450_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
+ if (!pvr_version_is(PVR_VER_7450) && !pvr_version_is(PVR_VER_7455) &&
+ !pvr_version_is(PVR_VER_7447) && !pvr_version_is(PVR_VER_7447A) &&
+ !pvr_version_is(PVR_VER_7448))
return -ENODEV;
return register_power_pmu(&mpc7450_pmu);
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index a213a0aa5d25..350dccb0143c 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -13,9 +13,11 @@
#include <asm/ptrace.h>
#include <asm/perf_regs.h>
+u64 PERF_REG_EXTENDED_MASK;
+
#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
-#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK))
static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]),
@@ -69,11 +71,36 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
};
-u64 perf_reg_value(struct pt_regs *regs, int idx)
+/* Function to return the extended register values */
+static u64 get_ext_regs_value(int idx)
{
- if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
- return 0;
+ switch (idx) {
+ case PERF_REG_POWERPC_PMC1 ... PERF_REG_POWERPC_PMC6:
+ return get_pmcs_ext_regs(idx - PERF_REG_POWERPC_PMC1);
+ case PERF_REG_POWERPC_MMCR0:
+ return mfspr(SPRN_MMCR0);
+ case PERF_REG_POWERPC_MMCR1:
+ return mfspr(SPRN_MMCR1);
+ case PERF_REG_POWERPC_MMCR2:
+ return mfspr(SPRN_MMCR2);
+#ifdef CONFIG_PPC64
+ case PERF_REG_POWERPC_MMCR3:
+ return mfspr(SPRN_MMCR3);
+ case PERF_REG_POWERPC_SIER2:
+ return mfspr(SPRN_SIER2);
+ case PERF_REG_POWERPC_SIER3:
+ return mfspr(SPRN_SIER3);
+ case PERF_REG_POWERPC_SDAR:
+ return mfspr(SPRN_SDAR);
+#endif
+ case PERF_REG_POWERPC_SIAR:
+ return mfspr(SPRN_SIAR);
+ default: return 0;
+ }
+}
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
if (idx == PERF_REG_POWERPC_SIER &&
(IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
IS_ENABLED(CONFIG_PPC32) ||
@@ -85,6 +112,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
IS_ENABLED(CONFIG_PPC32)))
return 0;
+ if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX)
+ return get_ext_regs_value(idx);
+
+ /*
+ * If the idx is referring to value beyond the
+ * supported registers, return 0 with a warning
+ */
+ if (WARN_ON_ONCE(idx >= PERF_REG_EXTENDED_MAX))
+ return 0;
+
return regs_get_register(regs, pt_regs_offset[idx]);
}
@@ -97,17 +134,14 @@ int perf_reg_validate(u64 mask)
u64 perf_reg_abi(struct task_struct *task)
{
-#ifdef CONFIG_PPC64
- if (!test_tsk_thread_flag(task, TIF_32BIT))
- return PERF_SAMPLE_REGS_ABI_64;
+ if (is_tsk_32bit_task(task))
+ return PERF_SAMPLE_REGS_ABI_32;
else
-#endif
- return PERF_SAMPLE_REGS_ABI_32;
+ return PERF_SAMPLE_REGS_ABI_64;
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
regs_user->regs = task_pt_regs(current);
regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
new file mode 100644
index 000000000000..564f14097f07
--- /dev/null
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER10 processors.
+ *
+ * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
+ * Copyright 2020 Athira Rajeev, IBM Corporation.
+ */
+
+/*
+ * Power10 event codes.
+ */
+EVENT(PM_CYC, 0x600f4);
+EVENT(PM_DISP_STALL_CYC, 0x100f8);
+EVENT(PM_EXEC_STALL, 0x30008);
+EVENT(PM_INST_CMPL, 0x500fa);
+EVENT(PM_BR_CMPL, 0x4d05e);
+EVENT(PM_BR_MPRED_CMPL, 0x400f6);
+EVENT(PM_BR_FIN, 0x2f04a);
+EVENT(PM_MPRED_BR_FIN, 0x3e098);
+EVENT(PM_LD_DEMAND_MISS_L1_FIN, 0x400f0);
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1, 0x100fc);
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1, 0x3e054);
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1, 0x300f0);
+/* L1 cache data prefetches */
+EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c);
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS, 0x200fc);
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1, 0x04080);
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040);
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_REQ, 0x040a0);
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3, 0x01340000001c040);
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS, 0x300fe);
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST, 0x010000046080);
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS, 0x26880);
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PF_MISS_L3, 0x100000016080);
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS, 0x300fc);
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS, 0x400fc);
+
+EVENT(PM_CYC_ALT, 0x0001e);
+EVENT(PM_INST_CMPL_ALT, 0x00002);
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ * SM (Sampling Mode)
+ * EM (Eligibility for Random Sampling)
+ * TECE (Threshold Event Counter Event)
+ * TS (Threshold Start Event)
+ * TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ * sample [EM,SM]
+ * thresh_sel (TECE)
+ * thresh start (TS)
+ * thresh end (TE)
+ */
+
+EVENT(MEM_LOADS, 0x35340401e0);
+EVENT(MEM_STORES, 0x353c0401e0);
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
new file mode 100644
index 000000000000..62a68b6b2d4b
--- /dev/null
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER10 processors.
+ *
+ * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
+ * Copyright 2020 Athira Rajeev, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "power10-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding for Power10:
+ *
+ * 60 56 52 48 44 40 36 32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * | | [ ] [ src_match ] [ src_mask ] | [ ] [ l2l3_sel ] [ thresh_ctl ]
+ * | | | | | |
+ * | | *- IFM (Linux) | | thresh start/stop -*
+ * | *- BHRB (Linux) | src_sel
+ * *- EBB (Linux) *invert_bit
+ *
+ * 28 24 20 16 12 8 4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] | m [ pmcxsel ]
+ * | | | | | | |
+ * | | | | | | *- mark
+ * | | | *- L1/L2/L3 cache_sel | |*-radix_scope_qual
+ * | | sdar_mode |
+ * | *- sampling mode for marked events *- combine
+ * |
+ * *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit (PMCxUNIT)
+ * MMCR1[24] = pmc1combine[0]
+ * MMCR1[25] = pmc1combine[1]
+ * MMCR1[26] = pmc2combine[0]
+ * MMCR1[27] = pmc2combine[1]
+ * MMCR1[28] = pmc3combine[0]
+ * MMCR1[29] = pmc3combine[1]
+ * MMCR1[30] = pmc4combine[0]
+ * MMCR1[31] = pmc4combine[1]
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ * MMCR1[20:27] = thresh_ctl
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ * MMCR1[20:27] = thresh_ctl
+ * else
+ * MMCRA[48:55] = thresh_ctl (THRESH START/END)
+ *
+ * if thresh_sel:
+ * MMCRA[45:47] = thresh_sel
+ *
+ * if l2l3_sel:
+ * MMCR2[56:60] = l2l3_sel[0:4]
+ *
+ * MMCR1[16] = cache_sel[0]
+ * MMCR1[17] = cache_sel[1]
+ * MMCR1[18] = radix_scope_qual
+ *
+ * if mark:
+ * MMCRA[63] = 1 (SAMPLE_ENABLE)
+ * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
+ * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ * MMCRA[32:33] = IFM
+ *
+ * MMCRA[SDAR_MODE] = sdar_mode[0:1]
+ */
+
+/*
+ * Some power10 event codes.
+ */
+#define EVENT(_name, _code) enum{_name = _code}
+
+#include "power10-events-list.h"
+
+#undef EVENT
+
+/* MMCRA IFM bits - POWER10 */
+#define POWER10_MMCRA_IFM1 0x0000000040000000UL
+#define POWER10_MMCRA_IFM2 0x0000000080000000UL
+#define POWER10_MMCRA_IFM3 0x00000000C0000000UL
+#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL
+
+extern u64 PERF_REG_EXTENDED_MASK;
+
+/* Table of alternatives, sorted by column 0 */
+static const unsigned int power10_event_alternatives[][MAX_ALT] = {
+ { PM_INST_CMPL_ALT, PM_INST_CMPL },
+ { PM_CYC_ALT, PM_CYC },
+};
+
+static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int num_alt = 0;
+
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(power10_event_alternatives), flags,
+ power10_event_alternatives);
+
+ return num_alt;
+}
+
+static int power10_check_attr_config(struct perf_event *ev)
+{
+ u64 val;
+ u64 event = ev->attr.config;
+
+ val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+ if (val == 0x10 || isa3XX_check_attr_config(ev))
+ return -EINVAL;
+
+ return 0;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BR_FIN);
+GENERIC_EVENT_ATTR(branch-misses, PM_MPRED_BR_FIN);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_DEMAND_MISS_L1_FIN);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
+CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_LD_PREFETCH_CACHE_LINE_MISS);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ);
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PF_MISS_L3);
+CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
+
+static struct attribute *power10_events_attr_dd1[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+ GENERIC_EVENT_PTR(MEM_LOADS),
+ GENERIC_EVENT_PTR(MEM_STORES),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BR_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static struct attribute *power10_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_FIN),
+ GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN),
+ GENERIC_EVENT_PTR(MEM_LOADS),
+ GENERIC_EVENT_PTR(MEM_STORES),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_L3_PF_MISS_L3),
+ CACHE_EVENT_PTR(PM_L2_ST_MISS),
+ CACHE_EVENT_PTR(PM_L2_ST),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BR_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static const struct attribute_group power10_pmu_events_group_dd1 = {
+ .name = "events",
+ .attrs = power10_events_attr_dd1,
+};
+
+static const struct attribute_group power10_pmu_events_group = {
+ .name = "events",
+ .attrs = power10_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-59");
+PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
+PMU_FORMAT_ATTR(mark, "config:8");
+PMU_FORMAT_ATTR(combine, "config:10-11");
+PMU_FORMAT_ATTR(unit, "config:12-15");
+PMU_FORMAT_ATTR(pmc, "config:16-19");
+PMU_FORMAT_ATTR(cache_sel, "config:20-21");
+PMU_FORMAT_ATTR(sdar_mode, "config:22-23");
+PMU_FORMAT_ATTR(sample_mode, "config:24-28");
+PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
+PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
+PMU_FORMAT_ATTR(thresh_start, "config:36-39");
+PMU_FORMAT_ATTR(l2l3_sel, "config:40-44");
+PMU_FORMAT_ATTR(src_sel, "config:45-46");
+PMU_FORMAT_ATTR(invert_bit, "config:47");
+PMU_FORMAT_ATTR(src_mask, "config:48-53");
+PMU_FORMAT_ATTR(src_match, "config:54-59");
+PMU_FORMAT_ATTR(radix_scope, "config:9");
+PMU_FORMAT_ATTR(thresh_cmp, "config1:0-17");
+
+static struct attribute *power10_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_pmcxsel.attr,
+ &format_attr_mark.attr,
+ &format_attr_combine.attr,
+ &format_attr_unit.attr,
+ &format_attr_pmc.attr,
+ &format_attr_cache_sel.attr,
+ &format_attr_sdar_mode.attr,
+ &format_attr_sample_mode.attr,
+ &format_attr_thresh_sel.attr,
+ &format_attr_thresh_stop.attr,
+ &format_attr_thresh_start.attr,
+ &format_attr_l2l3_sel.attr,
+ &format_attr_src_sel.attr,
+ &format_attr_invert_bit.attr,
+ &format_attr_src_mask.attr,
+ &format_attr_src_match.attr,
+ &format_attr_radix_scope.attr,
+ &format_attr_thresh_cmp.attr,
+ NULL,
+};
+
+static const struct attribute_group power10_pmu_format_group = {
+ .name = "format",
+ .attrs = power10_pmu_format_attr,
+};
+
+static struct attribute *power10_pmu_caps_attrs[] = {
+ NULL
+};
+
+static struct attribute_group power10_pmu_caps_group = {
+ .name = "caps",
+ .attrs = power10_pmu_caps_attrs,
+};
+
+static const struct attribute_group *power10_pmu_attr_groups_dd1[] = {
+ &power10_pmu_format_group,
+ &power10_pmu_events_group_dd1,
+ &power10_pmu_caps_group,
+ NULL,
+};
+
+static const struct attribute_group *power10_pmu_attr_groups[] = {
+ &power10_pmu_format_group,
+ &power10_pmu_events_group,
+ &power10_pmu_caps_group,
+ NULL,
+};
+
+static int power10_generic_events_dd1[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
+};
+
+static int power10_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_DEMAND_MISS_L1_FIN,
+};
+
+static u64 power10_bhrb_filter_map(u64 branch_sample_type)
+{
+ u64 pmu_bhrb_filter = 0;
+
+ /* BHRB and regular PMU events share the same privilege state
+ * filter configuration. BHRB is always recorded along with a
+ * regular PMU event. As the privilege state filter is handled
+ * in the basic PMC configuration of the accompanying regular
+ * PMU event, we ignore any separate BHRB specific request.
+ */
+
+ /* No branch filter requested */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+ return pmu_bhrb_filter;
+
+ /* Invalid branch filter options - HW does not support */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) {
+ pmu_bhrb_filter |= POWER10_MMCRA_IFM2;
+ return pmu_bhrb_filter;
+ }
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) {
+ pmu_bhrb_filter |= POWER10_MMCRA_IFM3;
+ return pmu_bhrb_filter;
+ }
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ pmu_bhrb_filter |= POWER10_MMCRA_IFM1;
+ return pmu_bhrb_filter;
+ }
+
+ /* Every thing else is unsupported */
+ return -1;
+}
+
+static void power10_config_bhrb(u64 pmu_bhrb_filter)
+{
+ pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK;
+
+ /* Enable BHRB filter in PMU */
+ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_LD_REF_L1,
+ [C(RESULT_MISS)] = PM_LD_MISS_L1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ST_MISS_L1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+ [C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
+ [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_DTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_BR_CMPL,
+ [C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+};
+
+static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_LD_REF_L1,
+ [C(RESULT_MISS)] = PM_LD_MISS_L1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ST_MISS_L1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+ [C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
+ [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PM_L2_ST,
+ [C(RESULT_MISS)] = PM_L2_ST_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_DTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_BR_CMPL,
+ [C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+};
+
+#undef C
+
+/*
+ * Set the MMCR0[CC56RUN] bit to enable counting for
+ * PMC5 and PMC6 regardless of the state of CTRL[RUN],
+ * so that we can use counters 5 and 6 as PM_INST_CMPL and
+ * PM_CYC.
+ */
+static int power10_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags)
+{
+ int ret;
+
+ ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+ if (!ret)
+ mmcr->mmcr0 |= MMCR0_C56RUN;
+ return ret;
+}
+
+static struct power_pmu power10_pmu = {
+ .name = "POWER10",
+ .n_counter = MAX_PMU_COUNTERS,
+ .add_fields = ISA207_ADD_FIELDS,
+ .test_adder = ISA207_TEST_ADDER,
+ .group_constraint_mask = CNST_CACHE_PMC4_MASK,
+ .group_constraint_val = CNST_CACHE_PMC4_VAL,
+ .compute_mmcr = power10_compute_mmcr,
+ .config_bhrb = power10_config_bhrb,
+ .bhrb_filter_map = power10_bhrb_filter_map,
+ .get_constraint = isa207_get_constraint,
+ .get_alternatives = power10_get_alternatives,
+ .get_mem_data_src = isa207_get_mem_data_src,
+ .get_mem_weight = isa207_get_mem_weight,
+ .disable_pmc = isa207_disable_pmc,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S |
+ PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1,
+ .n_generic = ARRAY_SIZE(power10_generic_events),
+ .generic_events = power10_generic_events,
+ .cache_events = &power10_cache_events,
+ .attr_groups = power10_pmu_attr_groups,
+ .bhrb_nr = 32,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS,
+ .check_attr_config = power10_check_attr_config,
+};
+
+int __init init_power10_pmu(void)
+{
+ unsigned int pvr;
+ int rc;
+
+ pvr = mfspr(SPRN_PVR);
+ if (PVR_VER(pvr) != PVR_POWER10)
+ return -ENODEV;
+
+ /* Add the ppmu flag for power10 DD1 */
+ if ((PVR_CFG(pvr) == 1))
+ power10_pmu.flags |= PPMU_P10_DD1;
+
+ /* Set the PERF_REG_EXTENDED_MASK here */
+ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
+
+ if ((PVR_CFG(pvr) == 1)) {
+ power10_pmu.generic_events = power10_generic_events_dd1;
+ power10_pmu.attr_groups = power10_pmu_attr_groups_dd1;
+ power10_pmu.cache_events = &power10_cache_events_dd1;
+ }
+
+ rc = register_power_pmu(&power10_pmu);
+ if (rc)
+ return rc;
+
+ /* Tell userspace that EBB is supported */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+ return 0;
+}
+
+static struct power_pmu power11_pmu;
+
+int __init init_power11_pmu(void)
+{
+ unsigned int pvr;
+ int rc;
+
+ pvr = mfspr(SPRN_PVR);
+ if (PVR_VER(pvr) != PVR_POWER11)
+ return -ENODEV;
+
+ /* Set the PERF_REG_EXTENDED_MASK here */
+ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
+
+ power11_pmu = power10_pmu;
+ power11_pmu.name = "Power11";
+
+ rc = register_power_pmu(&power11_pmu);
+ if (rc)
+ return rc;
+
+ /* Tell userspace that EBB is supported */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+ return 0;
+}
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index f8574547fc6b..b4708ab73145 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -10,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
*/
@@ -130,7 +132,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
};
static int power5p_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh;
int bit, fmask;
@@ -448,7 +450,9 @@ static int power5p_marked_instr_event(u64 event)
}
static int power5p_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = 0;
@@ -586,20 +590,20 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
}
/* Return MMCRx values */
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
if (pmc_inuse & 1)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
- mmcr[0] |= MMCR0_PMCjCE;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
-static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power5p_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 3)
- mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+ mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
}
static int power5p_generic_events[] = {
@@ -618,7 +622,7 @@ static int power5p_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
[C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
@@ -673,11 +677,11 @@ static struct power_pmu power5p_pmu = {
.cache_events = &power5p_cache_events,
};
-int init_power5p_pmu(void)
+int __init init_power5p_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
- && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER5p)
return -ENODEV;
return register_power_pmu(&power5p_pmu);
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index da52ecab7c9a..c6aefd0a1cc8 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -10,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER5 (not POWER5++)
*/
@@ -134,7 +136,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
};
static int power5_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh;
int bit, fmask;
@@ -379,7 +381,9 @@ static int power5_marked_instr_event(u64 event)
}
static int power5_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -528,20 +532,20 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
}
/* Return MMCRx values */
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
if (pmc_inuse & 1)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
- mmcr[0] |= MMCR0_PMCjCE;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
-static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 3)
- mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+ mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
}
static int power5_generic_events[] = {
@@ -560,7 +564,7 @@ static int power5_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x4c1090, 0x3c1088 },
[C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
@@ -614,10 +618,11 @@ static struct power_pmu power5_pmu = {
.flags = PPMU_HAS_SSLOT,
};
-int init_power5_pmu(void)
+int __init init_power5_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER5)
return -ENODEV;
return register_power_pmu(&power5_pmu);
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 3929cacf72ed..9f720b522e17 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -10,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER6
*/
@@ -171,7 +173,8 @@ static int power6_marked_instr_event(u64 event)
* Assign PMC numbers and compute MMCR1 value for a set of events
*/
static int p6_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -243,13 +246,13 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
if (pmc < 4)
mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
}
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
if (pmc_inuse & 1)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0xe)
- mmcr[0] |= MMCR0_PMCjCE;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
@@ -264,7 +267,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
* 32-34 select field: nest (subunit) event selector
*/
static int p6_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, sh, subunit;
unsigned long mask = 0, value = 0;
@@ -332,26 +335,38 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
};
-/*
- * This could be made more efficient with a binary search on
- * a presorted list, if necessary
- */
static int find_alternatives_list(u64 event)
{
- int i, j;
- unsigned int alt;
-
- for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
- if (event < event_alternatives[i][0])
- return -1;
- for (j = 0; j < MAX_ALT; ++j) {
- alt = event_alternatives[i][j];
- if (!alt || event < alt)
- break;
- if (event == alt)
- return i;
- }
+ const unsigned int presorted_event_table[] = {
+ 0x0130e8, 0x080080, 0x080088, 0x10000a, 0x10000b, 0x10000d, 0x10000e,
+ 0x100010, 0x10001a, 0x100026, 0x100054, 0x100056, 0x1000f0, 0x1000f8,
+ 0x1000fc, 0x200008, 0x20000e, 0x200010, 0x200012, 0x200054, 0x2000f0,
+ 0x2000f2, 0x2000f4, 0x2000f5, 0x2000f6, 0x2000f8, 0x2000fc, 0x2000fe,
+ 0x2d0030, 0x30000a, 0x30000c, 0x300010, 0x300012, 0x30001a, 0x300056,
+ 0x3000f0, 0x3000f2, 0x3000f6, 0x3000f8, 0x3000fc, 0x3000fe, 0x400006,
+ 0x400007, 0x40000a, 0x40000e, 0x400010, 0x400018, 0x400056, 0x4000f0,
+ 0x4000f8, 0x600005
+ };
+ const unsigned int event_index_table[] = {
+ 0, 1, 2, 3, 4, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 12, 14,
+ 7, 15, 2, 9, 16, 3, 4, 0, 17, 10, 18, 19, 20, 1, 17, 15, 19,
+ 18, 2, 16, 21, 8, 0, 22, 13, 14, 11, 21, 5, 20, 22, 1, 6, 3
+ };
+ int hi = ARRAY_SIZE(presorted_event_table) - 1;
+ int lo = 0;
+
+ while (lo <= hi) {
+ int mid = lo + (hi - lo) / 2;
+ unsigned int alt = presorted_event_table[mid];
+
+ if (alt < event)
+ lo = mid + 1;
+ else if (alt > event)
+ hi = mid - 1;
+ else
+ return event_index_table[mid];
}
+
return -1;
}
@@ -457,11 +472,11 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
return nalt;
}
-static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
/* Set PMCxSEL to 0 to disable PMCx */
if (pmc <= 3)
- mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+ mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
}
static int power6_generic_events[] = {
@@ -481,7 +496,7 @@ static int power6_generic_events[] = {
* are event codes.
* The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
*/
-static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x280030, 0x80080 },
[C(OP_WRITE)] = { 0x180032, 0x80088 },
@@ -536,10 +551,11 @@ static struct power_pmu power6_pmu = {
.cache_events = &power6_cache_events,
};
-int init_power6_pmu(void)
+int __init init_power6_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER6)
return -ENODEV;
return register_power_pmu(&power6_pmu);
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index a137813a3076..c95ccf2e28da 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -10,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER7
*/
@@ -79,7 +81,7 @@ enum {
*/
static int power7_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, sh, unit;
unsigned long mask = 0, value = 0;
@@ -242,7 +244,9 @@ static int power7_marked_instr_event(u64 event)
}
static int power7_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -298,20 +302,20 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
}
/* Return MMCRx values */
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
if (pmc_inuse & 1)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
- mmcr[0] |= MMCR0_PMCjCE;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
-static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power7_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 3)
- mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+ mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
}
static int power7_generic_events[] = {
@@ -332,7 +336,7 @@ static int power7_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0xc880, 0x400f0 },
[C(OP_WRITE)] = { 0, 0x300f0 },
@@ -401,7 +405,7 @@ static struct attribute *power7_events_attr[] = {
NULL
};
-static struct attribute_group power7_pmu_events_group = {
+static const struct attribute_group power7_pmu_events_group = {
.name = "events",
.attrs = power7_events_attr,
};
@@ -413,7 +417,7 @@ static struct attribute *power7_pmu_format_attr[] = {
NULL,
};
-static struct attribute_group power7_pmu_format_group = {
+static const struct attribute_group power7_pmu_format_group = {
.name = "format",
.attrs = power7_pmu_format_attr,
};
@@ -441,13 +445,14 @@ static struct power_pmu power7_pmu = {
.cache_events = &power7_cache_events,
};
-int init_power7_pmu(void)
+int __init init_power7_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER7 && PVR_VER(pvr) != PVR_POWER7p)
return -ENODEV;
- if (pvr_version_is(PVR_POWER7p))
+ if (PVR_VER(pvr) == PVR_POWER7p)
power7_pmu.flags |= PPMU_SIAR_VALID;
return register_power_pmu(&power7_pmu);
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 3a5fcc20ff31..ef9685065aaf 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -92,7 +92,7 @@ enum {
*/
/* PowerISA v2.07 format attribute structure*/
-extern struct attribute_group isa207_pmu_format_group;
+extern const struct attribute_group isa207_pmu_format_group;
/* Table of alternatives, sorted by column 0 */
static const unsigned int event_alternatives[][MAX_ALT] = {
@@ -182,14 +182,24 @@ static struct attribute *power8_events_attr[] = {
NULL
};
-static struct attribute_group power8_pmu_events_group = {
+static const struct attribute_group power8_pmu_events_group = {
.name = "events",
.attrs = power8_events_attr,
};
+static struct attribute *power8_pmu_caps_attrs[] = {
+ NULL
+};
+
+static struct attribute_group power8_pmu_caps_group = {
+ .name = "caps",
+ .attrs = power8_pmu_caps_attrs,
+};
+
static const struct attribute_group *power8_pmu_attr_groups[] = {
&isa207_pmu_format_group,
&power8_pmu_events_group,
+ &power8_pmu_caps_group,
NULL,
};
@@ -253,7 +263,7 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter)
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
@@ -378,12 +388,13 @@ static struct power_pmu power8_pmu = {
.bhrb_nr = 32,
};
-int init_power8_pmu(void)
+int __init init_power8_pmu(void)
{
int rc;
+ unsigned int pvr = mfspr(SPRN_PVR);
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
+ if (PVR_VER(pvr) != PVR_POWER8E && PVR_VER(pvr) != PVR_POWER8NVL &&
+ PVR_VER(pvr) != PVR_POWER8)
return -ENODEV;
rc = register_power_pmu(&power8_pmu);
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 08c3ef796198..cb6a7dc02dd7 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -90,13 +90,15 @@ enum {
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
#define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL
+extern u64 PERF_REG_EXTENDED_MASK;
+
/* Nasty Power9 specific hack */
#define PVR_POWER9_CUMULUS 0x00002000
/* PowerISA v2.07 format attribute structure*/
-extern struct attribute_group isa207_pmu_format_group;
+extern const struct attribute_group isa207_pmu_format_group;
-int p9_dd21_bl_ev[] = {
+static int p9_dd21_bl_ev[] = {
PM_MRK_ST_DONE_L2,
PM_RADIX_PWC_L1_HIT,
PM_FLOP_CMPL,
@@ -110,7 +112,7 @@ int p9_dd21_bl_ev[] = {
PM_DISP_HELD_SYNC_HOLD,
};
-int p9_dd22_bl_ev[] = {
+static int p9_dd22_bl_ev[] = {
PM_DTLB_MISS_16G,
PM_DERAT_MISS_2M,
PM_DTLB_MISS_2M,
@@ -131,11 +133,11 @@ int p9_dd22_bl_ev[] = {
/* Table of alternatives, sorted by column 0 */
static const unsigned int power9_event_alternatives[][MAX_ALT] = {
- { PM_INST_DISP, PM_INST_DISP_ALT },
- { PM_RUN_CYC_ALT, PM_RUN_CYC },
- { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
- { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
{ PM_BR_2PATH, PM_BR_2PATH_ALT },
+ { PM_INST_DISP, PM_INST_DISP_ALT },
+ { PM_RUN_CYC_ALT, PM_RUN_CYC },
+ { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
+ { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
};
static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
@@ -149,6 +151,18 @@ static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
return num_alt;
}
+static int power9_check_attr_config(struct perf_event *ev)
+{
+ u64 val;
+ u64 event = ev->attr.config;
+
+ val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+ if (val == 0xC || isa3XX_check_attr_config(ev))
+ return -EINVAL;
+
+ return 0;
+}
+
GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC);
GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
@@ -203,7 +217,7 @@ static struct attribute *power9_events_attr[] = {
NULL
};
-static struct attribute_group power9_pmu_events_group = {
+static const struct attribute_group power9_pmu_events_group = {
.name = "events",
.attrs = power9_events_attr,
};
@@ -239,14 +253,24 @@ static struct attribute *power9_pmu_format_attr[] = {
NULL,
};
-static struct attribute_group power9_pmu_format_group = {
+static const struct attribute_group power9_pmu_format_group = {
.name = "format",
.attrs = power9_pmu_format_attr,
};
+static struct attribute *power9_pmu_caps_attrs[] = {
+ NULL
+};
+
+static struct attribute_group power9_pmu_caps_group = {
+ .name = "caps",
+ .attrs = power9_pmu_caps_attrs,
+};
+
static const struct attribute_group *power9_pmu_attr_groups[] = {
&power9_pmu_format_group,
&power9_pmu_events_group,
+ &power9_pmu_caps_group,
NULL,
};
@@ -310,7 +334,7 @@ static void power9_config_bhrb(u64 pmu_bhrb_filter)
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
@@ -434,16 +458,16 @@ static struct power_pmu power9_pmu = {
.cache_events = &power9_cache_events,
.attr_groups = power9_pmu_attr_groups,
.bhrb_nr = 32,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS,
+ .check_attr_config = power9_check_attr_config,
};
-int init_power9_pmu(void)
+int __init init_power9_pmu(void)
{
int rc = 0;
unsigned int pvr = mfspr(SPRN_PVR);
- /* Comes from cpu_specs[] */
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9"))
+ if (PVR_VER(pvr) != PVR_POWER9)
return -ENODEV;
/* Blacklist events */
@@ -457,6 +481,9 @@ int init_power9_pmu(void)
}
}
+ /* Set the PERF_REG_EXTENDED_MASK here */
+ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300;
+
rc = register_power_pmu(&power9_pmu);
if (rc)
return rc;
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 4035d93d87ab..762676fb839e 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -9,6 +9,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for PPC970
*/
@@ -188,7 +190,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
};
static int p970_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh, spcsel;
unsigned long mask = 0, value = 0;
@@ -253,7 +255,9 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
}
static int p970_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel;
@@ -393,27 +397,26 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
/* Return MMCRx values */
- mmcr[0] = mmcr0;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 = mmcr0;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
-static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
- int shift, i;
+ int shift;
+ /*
+ * Setting the PMCxSEL field to 0x08 disables PMC x.
+ */
if (pmc <= 1) {
shift = MMCR0_PMC1SEL_SH - 7 * pmc;
- i = 0;
+ mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift);
} else {
shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
- i = 1;
+ mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift);
}
- /*
- * Setting the PMCxSEL field to 0x08 disables PMC x.
- */
- mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
}
static int ppc970_generic_events[] = {
@@ -432,7 +435,7 @@ static int ppc970_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x8810, 0x3810 },
[C(OP_WRITE)] = { 0x7810, 0x813 },
@@ -486,11 +489,12 @@ static struct power_pmu ppc970_pmu = {
.flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
};
-int init_ppc970_pmu(void)
+int __init init_ppc970_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
- && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP")))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_970 && PVR_VER(pvr) != PVR_970MP &&
+ PVR_VER(pvr) != PVR_970FX && PVR_VER(pvr) != PVR_970GX)
return -ENODEV;
return register_power_pmu(&ppc970_pmu);
diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h
index fa9bc804e67a..6b2a59fefffa 100644
--- a/arch/powerpc/perf/req-gen/perf.h
+++ b/arch/powerpc/perf/req-gen/perf.h
@@ -139,6 +139,26 @@ PMU_EVENT_ATTR_STRING( \
#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
r_fields
+/* Generate event list for platforms with counter_info_version 0x6 or below */
+static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = {
+#include REQUEST_FILE
+ NULL
+};
+
+/*
+ * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci
+ * events were deprecated for platform firmware that supports
+ * counter_info_version 0x8 or above.
+ * Those deprecated events are still part of platform firmware that
+ * support counter_info_version 0x6 and below. As per the getPerfCountInfo
+ * v1.018 documentation there is no counter_info_version 0x7.
+ * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of
+ * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms
+ * that supports counter_info_version 0x8 or above.
+ */
+#undef ENABLE_EVENTS_COUNTERINFO_V6
+
+/* Generate event list for platforms with counter_info_version 0x8 or above*/
static __maybe_unused struct attribute *hv_gpci_event_attrs[] = {
#include REQUEST_FILE
NULL