summaryrefslogtreecommitdiff
path: root/arch/powerpc/perf/core-book3s.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/perf/core-book3s.c')
-rw-r--r--arch/powerpc/perf/core-book3s.c527
1 files changed, 414 insertions, 113 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 48604625ab31..b7ff680cde96 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -17,6 +17,8 @@
#include <asm/firmware.h>
#include <asm/ptrace.h>
#include <asm/code-patching.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
#ifdef CONFIG_PPC64
#include "internal.h"
@@ -37,12 +39,7 @@ struct cpu_hw_events {
struct perf_event *event[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int flags[MAX_HWEVENTS];
- /*
- * The order of the MMCR array is:
- * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2
- * - 32-bit, MMCR0, MMCR1, MMCR2
- */
- unsigned long mmcr[4];
+ struct mmcr_regs mmcr;
struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
@@ -59,6 +56,9 @@ struct cpu_hw_events {
struct perf_branch_stack bhrb_stack;
struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
u64 ic_init;
+
+ /* Store the PMC values */
+ unsigned long pmcs[MAX_HWEVENTS];
};
static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -77,6 +77,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
/*
* 32-bit doesn't have MMCRA but does have an MMCR2,
* and a few other names are different.
+ * Also 32-bit doesn't have MMCR3, SIER2 and SIER3.
+ * Define them as zero knowing that any code path accessing
+ * these registers (via mtspr/mfspr) are done under ppmu flag
+ * check for PPMU_ARCH_31 and we will not enter that code path
+ * for 32-bit.
*/
#ifdef CONFIG_PPC32
@@ -90,7 +95,12 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
#define MMCR0_PMCC_U6 0
#define SPRN_MMCRA SPRN_MMCR2
+#define SPRN_MMCR3 0
+#define SPRN_SIER2 0
+#define SPRN_SIER3 0
#define MMCRA_SAMPLE_ENABLE 0
+#define MMCRA_BHRB_DISABLE 0
+#define MMCR0_PMCCEXT 0
static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
@@ -105,10 +115,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
{
regs->result = 0;
}
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
- return 0;
-}
static inline int siar_valid(struct pt_regs *regs)
{
@@ -121,24 +127,38 @@ static void ebb_event_add(struct perf_event *event) { }
static void ebb_switch_out(unsigned long mmcr0) { }
static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
{
- return cpuhw->mmcr[0];
+ return cpuhw->mmcr.mmcr0;
}
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
-static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
+static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
bool is_sier_available(void)
{
+ if (!ppmu)
+ return false;
+
if (ppmu->flags & PPMU_HAS_SIER)
return true;
return false;
}
+/*
+ * Return PMC value corresponding to the
+ * index passed.
+ */
+unsigned long get_pmcs_ext_regs(int idx)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ return cpuhw->pmcs[idx];
+}
+
static bool regs_use_siar(struct pt_regs *regs)
{
/*
@@ -150,7 +170,7 @@ static bool regs_use_siar(struct pt_regs *regs)
* they have not been setup using perf_read_regs() and so regs->result
* is something random.
*/
- return ((TRAP(regs) == 0xf00) && regs->result);
+ return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result);
}
/*
@@ -204,7 +224,7 @@ static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
- if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
+ if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel)
*addrp = 0;
}
@@ -246,11 +266,32 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs)
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
bool use_siar = regs_use_siar(regs);
+ unsigned long mmcra = regs->dsisr;
+ int marked = mmcra & MMCRA_SAMPLE_ENABLE;
if (!use_siar)
return perf_flags_from_msr(regs);
/*
+ * Check the address in SIAR to identify the
+ * privilege levels since the SIER[MSR_HV, MSR_PR]
+ * bits are not set for marked events in power10
+ * DD1.
+ */
+ if (marked && (ppmu->flags & PPMU_P10_DD1)) {
+ unsigned long siar = mfspr(SPRN_SIAR);
+ if (siar) {
+ if (is_kernel_addr(siar))
+ return PERF_RECORD_MISC_KERNEL;
+ return PERF_RECORD_MISC_USER;
+ } else {
+ if (is_kernel_addr(regs->nip))
+ return PERF_RECORD_MISC_KERNEL;
+ return PERF_RECORD_MISC_USER;
+ }
+ }
+
+ /*
* If we don't have flags in MMCRA, rather than using
* the MSR, we intuit the flags from the address in
* SIAR which should give slightly more reliable
@@ -300,6 +341,13 @@ static inline void perf_read_regs(struct pt_regs *regs)
* If the PMU doesn't update the SIAR for non marked events use
* pt_regs.
*
+ * If regs is a kernel interrupt, always use SIAR. Some PMUs have an
+ * issue with regs_sipr not being in synch with SIAR in interrupt entry
+ * and return sequences, which can result in regs_sipr being true for
+ * kernel interrupts and SIAR, which has the effect of causing samples
+ * to pile up at mtmsrd MSR[EE] 0->1 or pending irq replay around
+ * interrupt entry/exit.
+ *
* If the PMU has HV/PR flags then check to see if they
* place the exception in userspace. If so, use pt_regs. In
* continuous sampling mode the SIAR and the PMU exception are
@@ -308,7 +356,7 @@ static inline void perf_read_regs(struct pt_regs *regs)
* hypervisor samples as well as samples in the kernel with
* interrupts off hence the userspace check.
*/
- if (TRAP(regs) != 0xf00)
+ if (TRAP(regs) != INTERRUPT_PERFMON)
use_siar = 0;
else if ((ppmu->flags & PPMU_NO_SIAR))
use_siar = 0;
@@ -316,6 +364,8 @@ static inline void perf_read_regs(struct pt_regs *regs)
use_siar = 1;
else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
use_siar = 0;
+ else if (!user_mode(regs))
+ use_siar = 1;
else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
use_siar = 0;
else
@@ -325,15 +375,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
}
/*
- * If interrupts were soft-disabled when a PMU interrupt occurs, treat
- * it as an NMI.
- */
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
- return (regs->softe & IRQS_DISABLED);
-}
-
-/*
* On processors like P7+ that have the SIAR-Valid bit, marked instructions
* must be sampled only if the SIAR-valid bit is set.
*
@@ -346,7 +387,14 @@ static inline int siar_valid(struct pt_regs *regs)
int marked = mmcra & MMCRA_SAMPLE_ENABLE;
if (marked) {
- if (ppmu->flags & PPMU_HAS_SIER)
+ /*
+ * SIER[SIAR_VALID] is not set for some
+ * marked events on power10 DD1, so drop
+ * the check for SIER[SIAR_VALID] and return true.
+ */
+ if (ppmu->flags & PPMU_P10_DD1)
+ return 0x1;
+ else if (ppmu->flags & PPMU_HAS_SIER)
return regs->dar & SIER_SIAR_VALID;
if (ppmu->flags & PPMU_SIAR_VALID)
@@ -376,7 +424,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
cpuhw->bhrb_context = event->ctx;
}
cpuhw->bhrb_users++;
- perf_sched_cb_inc(event->ctx->pmu);
+ perf_sched_cb_inc(event->pmu);
}
static void power_pmu_bhrb_disable(struct perf_event *event)
@@ -388,7 +436,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
WARN_ON_ONCE(!cpuhw->bhrb_users);
cpuhw->bhrb_users--;
- perf_sched_cb_dec(event->ctx->pmu);
+ perf_sched_cb_dec(event->pmu);
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
/* BHRB cannot be turned off when other
@@ -403,7 +451,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
/* Called from ctxsw to prevent one process's branch entries to
* mingle with the other process's entries during context switch.
*/
-static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
if (!ppmu->bhrb_nr)
return;
@@ -415,24 +463,20 @@ static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static __u64 power_pmu_bhrb_to(u64 addr)
{
unsigned int instr;
- int ret;
__u64 target;
if (is_kernel_addr(addr)) {
- if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+ if (copy_from_kernel_nofault(&instr, (void *)addr,
+ sizeof(instr)))
return 0;
return branch_target(&instr);
}
/* Userspace: need copy instruction here then translate it */
- pagefault_disable();
- ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
- if (ret) {
- pagefault_enable();
+ if (copy_from_user_nofault(&instr, (unsigned int __user *)addr,
+ sizeof(instr)))
return 0;
- }
- pagefault_enable();
target = branch_target(&instr);
if ((!target) || (instr & BRANCH_ABSOLUTE))
@@ -470,8 +514,11 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
* addresses at this point. Check the privileges before
* exporting it to userspace (avoid exposure of regions
* where we could have speculative execution)
+ * Incase of ISA v3.1, BHRB will capture only user-space
+ * addresses, hence include a check before filtering code
*/
- if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
+ if (!(ppmu->flags & PPMU_ARCH_31) &&
+ is_kernel_addr(addr) && event->attr.exclude_kernel)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -524,6 +571,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
}
}
cpuhw->bhrb_stack.nr = u_index;
+ cpuhw->bhrb_stack.hw_idx = -1ULL;
return;
}
@@ -589,11 +637,16 @@ static void ebb_switch_out(unsigned long mmcr0)
current->thread.sdar = mfspr(SPRN_SDAR);
current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK;
current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
+ if (ppmu->flags & PPMU_ARCH_31) {
+ current->thread.mmcr3 = mfspr(SPRN_MMCR3);
+ current->thread.sier2 = mfspr(SPRN_SIER2);
+ current->thread.sier3 = mfspr(SPRN_SIER3);
+ }
}
static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
{
- unsigned long mmcr0 = cpuhw->mmcr[0];
+ unsigned long mmcr0 = cpuhw->mmcr.mmcr0;
if (!ebb)
goto out;
@@ -627,7 +680,13 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
* unfreeze counters, it should not set exclude_xxx in its events and
* instead manage the MMCR2 entirely by itself.
*/
- mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2);
+ mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2);
+
+ if (ppmu->flags & PPMU_ARCH_31) {
+ mtspr(SPRN_MMCR3, current->thread.mmcr3);
+ mtspr(SPRN_SIER2, current->thread.sier2);
+ mtspr(SPRN_SIER3, current->thread.sier3);
+ }
out:
return mmcr0;
}
@@ -717,6 +776,34 @@ static void pmao_restore_workaround(bool ebb)
mtspr(SPRN_PMC6, pmcs[5]);
}
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * through to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+ struct cpu_hw_events *cpuhw;
+
+ /*
+ * This could simply test local_paca->pmcregs_in_use if that were not
+ * under ifdef KVM.
+ */
+ if (!ppmu)
+ return false;
+
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ return cpuhw->n_events;
+}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@@ -799,6 +886,19 @@ static void write_pmc(int idx, unsigned long val)
}
}
+static int any_pmc_overflown(struct cpu_hw_events *cpuhw)
+{
+ int i, idx;
+
+ for (i = 0; i < cpuhw->n_events; i++) {
+ idx = cpuhw->event[i]->hw.idx;
+ if ((idx) && ((int)read_pmc(idx) < 0))
+ return idx;
+ }
+
+ return 0;
+}
+
/* Called from sysrq_handle_showregs() */
void perf_event_print_debug(void)
{
@@ -848,6 +948,11 @@ void perf_event_print_debug(void)
pr_info("EBBRR: %016lx BESCR: %016lx\n",
mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
}
+
+ if (ppmu->flags & PPMU_ARCH_31) {
+ pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n",
+ mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3));
+ }
#endif
pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n",
mfspr(SPRN_SIAR), sdar, sier);
@@ -863,7 +968,7 @@ void perf_event_print_debug(void)
*/
static int power_check_constraints(struct cpu_hw_events *cpuhw,
u64 event_id[], unsigned int cflags[],
- int n_ev)
+ int n_ev, struct perf_event **event)
{
unsigned long mask, value, nv;
unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
@@ -886,7 +991,7 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
event_id[i] = cpuhw->alternatives[i][0];
}
if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
- &cpuhw->avalues[i][0]))
+ &cpuhw->avalues[i][0], event[i]->attr.config1))
return -1;
}
value = mask = 0;
@@ -921,7 +1026,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
for (j = 1; j < n_alt[i]; ++j)
ppmu->get_constraint(cpuhw->alternatives[i][j],
&cpuhw->amasks[i][j],
- &cpuhw->avalues[i][j]);
+ &cpuhw->avalues[i][j],
+ event[i]->attr.config1);
}
/* enumerate all possibilities and see if any will work */
@@ -1036,7 +1142,7 @@ static u64 check_and_compute_delta(u64 prev, u64 val)
/*
* POWER7 can roll back counter values, if the new value is smaller
* than the previous value it will cause the delta and the counter to
- * have bogus values unless we rolled a counter over. If a coutner is
+ * have bogus values unless we rolled a counter over. If a counter is
* rolled back, it will be smaller, but within 256, which is the maximum
* number of events to rollback at once. If we detect a rollback
* return 0. This can lead to a small lack of precision in the
@@ -1199,7 +1305,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
static void power_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
- unsigned long flags, mmcr0, val;
+ unsigned long flags, mmcr0, val, mmcra;
if (!ppmu)
return;
@@ -1217,11 +1323,16 @@ static void power_pmu_disable(struct pmu *pmu)
/*
* Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56
+ * Also clear PMXE to disable PMI's getting triggered in some
+ * corner cases during PMU disable.
*/
val = mmcr0 = mfspr(SPRN_MMCR0);
val |= MMCR0_FC;
val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
- MMCR0_FC56);
+ MMCR0_PMXE | MMCR0_FC56);
+ /* Set mmcr0 PMCCEXT for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCR0_PMCCEXT;
/*
* The barrier is to make sure the mtspr has been
@@ -1233,11 +1344,45 @@ static void power_pmu_disable(struct pmu *pmu)
isync();
/*
+ * Some corner cases could clear the PMU counter overflow
+ * while a masked PMI is pending. One such case is when
+ * a PMI happens during interrupt replay and perf counter
+ * values are cleared by PMU callbacks before replay.
+ *
+ * Disable the interrupt by clearing the paca bit for PMI
+ * since we are disabling the PMU now. Otherwise provide a
+ * warning if there is PMI pending, but no counter is found
+ * overflown.
+ *
+ * Since power_pmu_disable runs under local_irq_save, it
+ * could happen that code hits a PMC overflow without PMI
+ * pending in paca. Hence only clear PMI pending if it was
+ * set.
+ *
+ * If a PMI is pending, then MSR[EE] must be disabled (because
+ * the masked PMI handler disabling EE). So it is safe to
+ * call clear_pmi_irq_pending().
+ */
+ if (pmi_irq_pending())
+ clear_pmi_irq_pending();
+
+ val = mmcra = cpuhw->mmcr.mmcra;
+
+ /*
* Disable instruction sampling if it was enabled
*/
- if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
- mtspr(SPRN_MMCRA,
- cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+ val &= ~MMCRA_SAMPLE_ENABLE;
+
+ /* Disable BHRB via mmcra (BHRBRD) for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCRA_BHRB_DISABLE;
+
+ /*
+ * Write SPRN_MMCRA if mmcra has either disabled
+ * instruction sampling or BHRB.
+ */
+ if (val != mmcra) {
+ mtspr(SPRN_MMCRA, val);
mb();
isync();
}
@@ -1311,18 +1456,29 @@ static void power_pmu_enable(struct pmu *pmu)
* (possibly updated for removal of events).
*/
if (!cpuhw->n_added) {
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
- mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+ /*
+ * If there is any active event with an overflown PMC
+ * value, set back PACA_IRQ_PMI which would have been
+ * cleared in power_pmu_disable().
+ */
+ hard_irq_disable();
+ if (any_pmc_overflown(cpuhw))
+ set_pmi_irq_pending();
+
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ if (ppmu->flags & PPMU_ARCH_31)
+ mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
goto out_enable;
}
/*
* Clear all MMCR settings and recompute them for the new set of events.
*/
- memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
+ memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
- cpuhw->mmcr, cpuhw->event)) {
+ &cpuhw->mmcr, cpuhw->event, ppmu->flags)) {
/* shouldn't ever get here */
printk(KERN_ERR "oops compute_mmcr failed\n");
goto out;
@@ -1336,11 +1492,11 @@ static void power_pmu_enable(struct pmu *pmu)
*/
event = cpuhw->event[0];
if (event->attr.exclude_user)
- cpuhw->mmcr[0] |= MMCR0_FCP;
+ cpuhw->mmcr.mmcr0 |= MMCR0_FCP;
if (event->attr.exclude_kernel)
- cpuhw->mmcr[0] |= freeze_events_kernel;
+ cpuhw->mmcr.mmcr0 |= freeze_events_kernel;
if (event->attr.exclude_hv)
- cpuhw->mmcr[0] |= MMCR0_FCHV;
+ cpuhw->mmcr.mmcr0 |= MMCR0_FCHV;
}
/*
@@ -1349,12 +1505,15 @@ static void power_pmu_enable(struct pmu *pmu)
* Then unfreeze the events.
*/
ppc_set_pmu_inuse(1);
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
- mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
- mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
| MMCR0_FC);
if (ppmu->flags & PPMU_ARCH_207S)
- mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
+ mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2);
+
+ if (ppmu->flags & PPMU_ARCH_31)
+ mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
/*
* Read off any pre-existing events that need to move
@@ -1405,7 +1564,7 @@ static void power_pmu_enable(struct pmu *pmu)
perf_event_update_userpage(event);
}
cpuhw->n_limited = n_lim;
- cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+ cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE;
out_enable:
pmao_restore_workaround(ebb);
@@ -1421,9 +1580,9 @@ static void power_pmu_enable(struct pmu *pmu)
/*
* Enable instruction sampling if necessary
*/
- if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+ if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) {
mb();
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra);
}
out:
@@ -1507,7 +1666,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
goto out;
- if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+ if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event))
goto out;
event->hw.config = cpuhw->events[n0];
@@ -1520,9 +1679,16 @@ nocheck:
ret = 0;
out:
if (has_branch_stack(event)) {
- power_pmu_bhrb_enable(event);
- cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
- event->attr.branch_sample_type);
+ u64 bhrb_filter = -1;
+
+ if (ppmu->bhrb_filter_map)
+ bhrb_filter = ppmu->bhrb_filter_map(
+ event->attr.branch_sample_type);
+
+ if (bhrb_filter != -1) {
+ cpuhw->bhrb_filter = bhrb_filter;
+ power_pmu_bhrb_enable(event);
+ }
}
perf_pmu_enable(event->pmu);
@@ -1553,7 +1719,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
cpuhw->flags[i-1] = cpuhw->flags[i];
}
--cpuhw->n_events;
- ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
+ ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr);
if (event->hw.idx) {
write_pmc(event->hw.idx, 0);
event->hw.idx = 0;
@@ -1574,7 +1740,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
}
if (cpuhw->n_events == 0) {
/* disable exceptions if no events are running */
- cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+ cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE);
}
if (has_branch_stack(event))
@@ -1710,7 +1876,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
n = cpuhw->n_events;
if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
return -EAGAIN;
- i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
+ i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event);
if (i < 0)
return -EAGAIN;
@@ -1798,7 +1964,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
static int hw_perf_cache_event(u64 config, u64 *eventp)
{
unsigned long type, op, result;
- int ev;
+ u64 ev;
if (!ppmu->cache_events)
return -EINVAL;
@@ -1837,14 +2003,13 @@ static bool is_event_blacklisted(u64 ev)
static int power_pmu_event_init(struct perf_event *event)
{
u64 ev;
- unsigned long flags;
+ unsigned long flags, irq_flags;
struct perf_event *ctrs[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int cflags[MAX_HWEVENTS];
int n;
int err;
struct cpu_hw_events *cpuhw;
- u64 bhrb_filter;
if (!ppmu)
return -ENOENT;
@@ -1883,6 +2048,17 @@ static int power_pmu_event_init(struct perf_event *event)
return -ENOENT;
}
+ /*
+ * PMU config registers have fields that are
+ * reserved and some specific values for bit fields are reserved.
+ * For ex., MMCRA[61:62] is Random Sampling Mode (SM)
+ * and value of 0b11 to this field is reserved.
+ * Check for invalid values in attr.config.
+ */
+ if (ppmu->check_attr_config &&
+ ppmu->check_attr_config(event))
+ return -EINVAL;
+
event->hw.config_base = ev;
event->hw.idx = 0;
@@ -1946,21 +2122,43 @@ static int power_pmu_event_init(struct perf_event *event)
if (check_excludes(ctrs, cflags, n, 1))
return -EINVAL;
- cpuhw = &get_cpu_var(cpu_hw_events);
- err = power_check_constraints(cpuhw, events, cflags, n + 1);
+ local_irq_save(irq_flags);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs);
if (has_branch_stack(event)) {
- bhrb_filter = ppmu->bhrb_filter_map(
+ u64 bhrb_filter = -1;
+
+ /*
+ * Currently no PMU supports having multiple branch filters
+ * at the same time. Branch filters are set via MMCRA IFM[32:33]
+ * bits for Power8 and above. Return EOPNOTSUPP when multiple
+ * branch filters are requested in the event attr.
+ *
+ * When opening event via perf_event_open(), branch_sample_type
+ * gets adjusted in perf_copy_attr(). Kernel will automatically
+ * adjust the branch_sample_type based on the event modifier
+ * settings to include PERF_SAMPLE_BRANCH_PLM_ALL. Hence drop
+ * the check for PERF_SAMPLE_BRANCH_PLM_ALL.
+ */
+ if (hweight64(event->attr.branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) > 1) {
+ local_irq_restore(irq_flags);
+ return -EOPNOTSUPP;
+ }
+
+ if (ppmu->bhrb_filter_map)
+ bhrb_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
if (bhrb_filter == -1) {
- put_cpu_var(cpu_hw_events);
+ local_irq_restore(irq_flags);
return -EOPNOTSUPP;
}
cpuhw->bhrb_filter = bhrb_filter;
}
- put_cpu_var(cpu_hw_events);
+ local_irq_restore(irq_flags);
if (err)
return -EINVAL;
@@ -2028,6 +2226,9 @@ static struct pmu power_pmu = {
.sched_task = power_pmu_sched_task,
};
+#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
+ PERF_SAMPLE_PHYS_ADDR | \
+ PERF_SAMPLE_DATA_PAGE_SIZE)
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
@@ -2063,7 +2264,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
left += period;
if (left <= 0)
left = period;
- record = siar_valid(regs);
+
+ /*
+ * If address is not requested in the sample via
+ * PERF_SAMPLE_IP, just record that sample irrespective
+ * of SIAR valid check.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_IP)
+ record = siar_valid(regs);
+ else
+ record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x80000000LL)
@@ -2076,6 +2287,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_event_update_userpage(event);
/*
+ * Due to hardware limitation, sometimes SIAR could sample a kernel
+ * address even when freeze on supervisor state (kernel) is set in
+ * MMCR2. Check attr.exclude_kernel and address to drop the sample in
+ * these cases.
+ */
+ if (event->attr.exclude_kernel &&
+ (event->attr.sample_type & PERF_SAMPLE_IP) &&
+ is_kernel_addr(mfspr(SPRN_SIAR)))
+ record = 0;
+
+ /*
* Finally record data if requested.
*/
if (record) {
@@ -2083,27 +2305,33 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
- if (event->attr.sample_type &
- (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
+ if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
perf_get_data_addr(event, regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw);
- data.br_stack = &cpuhw->bhrb_stack;
+ perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
}
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
- ppmu->get_mem_data_src)
+ ppmu->get_mem_data_src) {
ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
+ data.sample_flags |= PERF_SAMPLE_DATA_SRC;
+ }
- if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
- ppmu->get_mem_weight)
- ppmu->get_mem_weight(&data.weight);
-
+ if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
+ ppmu->get_mem_weight) {
+ ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
+ data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+ }
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
+ } else if (period) {
+ /* Account for interrupt in case of invalid SIAR */
+ if (perf_event_account_interrupt(event))
+ power_pmu_stop(event, 0);
}
}
@@ -2127,12 +2355,10 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
*/
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
- bool use_siar = regs_use_siar(regs);
+ unsigned long siar = mfspr(SPRN_SIAR);
- if (use_siar && siar_valid(regs))
- return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
- else if (use_siar)
- return 0; // no valid instruction pointer
+ if (regs_use_siar(regs) && siar_valid(regs) && siar)
+ return siar + perf_ip_adjust(regs);
else
return regs->nip;
}
@@ -2172,9 +2398,7 @@ static void __perf_event_interrupt(struct pt_regs *regs)
int i, j;
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
- unsigned long val[8];
int found, active;
- int nmi;
if (cpuhw->n_limited)
freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
@@ -2182,20 +2406,14 @@ static void __perf_event_interrupt(struct pt_regs *regs)
perf_read_regs(regs);
- nmi = perf_intr_is_nmi(regs);
- if (nmi)
- nmi_enter();
- else
- irq_enter();
-
/* Read all the PMCs since we'll need them a bunch of times */
for (i = 0; i < ppmu->n_counter; ++i)
- val[i] = read_pmc(i + 1);
+ cpuhw->pmcs[i] = read_pmc(i + 1);
/* Try to find what caused the IRQ */
found = 0;
for (i = 0; i < ppmu->n_counter; ++i) {
- if (!pmc_overflow(val[i]))
+ if (!pmc_overflow(cpuhw->pmcs[i]))
continue;
if (is_limited_pmc(i + 1))
continue; /* these won't generate IRQs */
@@ -2210,10 +2428,18 @@ static void __perf_event_interrupt(struct pt_regs *regs)
event = cpuhw->event[j];
if (event->hw.idx == (i + 1)) {
active = 1;
- record_and_restart(event, val[i], regs);
+ record_and_restart(event, cpuhw->pmcs[i], regs);
break;
}
}
+
+ /*
+ * Clear PACA_IRQ_PMI in case it was set by
+ * set_pmi_irq_pending() when PMU was enabled
+ * after accounting for interrupts.
+ */
+ clear_pmi_irq_pending();
+
if (!active)
/* reset non active counters that have overflowed */
write_pmc(i + 1, 0);
@@ -2224,17 +2450,24 @@ static void __perf_event_interrupt(struct pt_regs *regs)
event = cpuhw->event[i];
if (!event->hw.idx || is_limited_pmc(event->hw.idx))
continue;
- if (pmc_overflow_power7(val[event->hw.idx - 1])) {
+ if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 1])) {
/* event has overflowed in a buggy way*/
found = 1;
record_and_restart(event,
- val[event->hw.idx - 1],
+ cpuhw->pmcs[event->hw.idx - 1],
regs);
}
}
}
- if (!found && !nmi && printk_ratelimit())
- printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
+
+ /*
+ * During system wide profiling or while specific CPU is monitored for an
+ * event, some corner cases could cause PMC to overflow in idle path. This
+ * will trigger a PMI after waking up from idle. Since counter values are _not_
+ * saved/restored in idle path, can lead to below "Can't find PMC" message.
+ */
+ if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+ printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
/*
* Reset MMCR0 to its normal value. This will set PMXE and
@@ -2243,12 +2476,11 @@ static void __perf_event_interrupt(struct pt_regs *regs)
* XXX might want to use MSR.PM to keep the events frozen until
* we get back out of this interrupt.
*/
- write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
+
+ /* Clear the cpuhw->pmcs */
+ memset(&cpuhw->pmcs, 0, sizeof(cpuhw->pmcs));
- if (nmi)
- nmi_exit();
- else
- irq_exit();
}
static void perf_event_interrupt(struct pt_regs *regs)
@@ -2265,12 +2497,39 @@ static int power_pmu_prepare_cpu(unsigned int cpu)
if (ppmu) {
memset(cpuhw, 0, sizeof(*cpuhw));
- cpuhw->mmcr[0] = MMCR0_FC;
+ cpuhw->mmcr.mmcr0 = MMCR0_FC;
}
return 0;
}
-int register_power_pmu(struct power_pmu *pmu)
+static ssize_t pmu_name_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (ppmu)
+ return sysfs_emit(buf, "%s", ppmu->name);
+
+ return 0;
+}
+
+static DEVICE_ATTR_RO(pmu_name);
+
+static struct attribute *pmu_caps_attrs[] = {
+ &dev_attr_pmu_name.attr,
+ NULL
+};
+
+static const struct attribute_group pmu_caps_group = {
+ .name = "caps",
+ .attrs = pmu_caps_attrs,
+};
+
+static const struct attribute_group *pmu_caps_groups[] = {
+ &pmu_caps_group,
+ NULL,
+};
+
+int __init register_power_pmu(struct power_pmu *pmu)
{
if (ppmu)
return -EBUSY; /* something's already registered */
@@ -2281,6 +2540,11 @@ int register_power_pmu(struct power_pmu *pmu)
power_pmu.attr_groups = ppmu->attr_groups;
+ if (ppmu->flags & PPMU_ARCH_207S)
+ power_pmu.attr_update = pmu_caps_groups;
+
+ power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
+
#ifdef MSR_HV
/*
* Use FCHV to ignore kernel events if MSR.HV is set.
@@ -2296,8 +2560,24 @@ int register_power_pmu(struct power_pmu *pmu)
}
#ifdef CONFIG_PPC64
+static bool pmu_override = false;
+static unsigned long pmu_override_val;
+static void do_pmu_override(void *data)
+{
+ ppc_set_pmu_inuse(1);
+ if (pmu_override_val)
+ mtspr(SPRN_MMCR1, pmu_override_val);
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+}
+
static int __init init_ppc64_pmu(void)
{
+ if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
+ pr_warn("disabling perf due to pmu_override= command line option.\n");
+ on_each_cpu(do_pmu_override, NULL, 1);
+ return 0;
+ }
+
/* run through all the pmu drivers one at a time */
if (!init_power5_pmu())
return 0;
@@ -2311,10 +2591,31 @@ static int __init init_ppc64_pmu(void)
return 0;
else if (!init_power9_pmu())
return 0;
+ else if (!init_power10_pmu())
+ return 0;
else if (!init_ppc970_pmu())
return 0;
else
return init_generic_compat_pmu();
}
early_initcall(init_ppc64_pmu);
+
+static int __init pmu_setup(char *str)
+{
+ unsigned long val;
+
+ if (!early_cpu_has_feature(CPU_FTR_HVMODE))
+ return 0;
+
+ pmu_override = true;
+
+ if (kstrtoul(str, 0, &val))
+ val = 0;
+
+ pmu_override_val = val;
+
+ return 1;
+}
+__setup("pmu_override=", pmu_setup);
+
#endif