summaryrefslogtreecommitdiff
path: root/arch/powerpc/perf/core-book3s.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-07 10:33:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-07 10:33:50 -0700
commit25d8d4eecace9de5a6a2193e4df1917afbdd3052 (patch)
tree1f1bbde6423745251c41fb4d1842e70b9f7bca07 /arch/powerpc/perf/core-book3s.c
parent60e76bb8a4e4c5398ea9053535e1fd0c9d6bb06e (diff)
parenta7aaa2f26bfd932a654706b19859e7adf802bee2 (diff)
Merge tag 'powerpc-5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Add support for (optionally) using queued spinlocks & rwlocks. - Support for a new faster system call ABI using the scv instruction on Power9 or later. - Drop support for the PROT_SAO mmap/mprotect flag as it will be unsupported on Power10 and future processors, leaving us with no way to implement the functionality it requests. This risks breaking userspace, though we believe it is unused in practice. - A bug fix for, and then the removal of, our custom stack expansion checking. We now allow stack expansion up to the rlimit, like other architectures. - Remove the remnants of our (previously disabled) topology update code, which tried to react to NUMA layout changes on virtualised systems, but was prone to crashes and other problems. - Add PMU support for Power10 CPUs. - A change to our signal trampoline so that we don't unbalance the link stack (branch return predictor) in the signal delivery path. - Lots of other cleanups, refactorings, smaller features and so on as usual. Thanks to: Abhishek Goel, Alastair D'Silva, Alexander A. Klimov, Alexey Kardashevskiy, Alistair Popple, Andrew Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Anton Blanchard, Arnd Bergmann, Athira Rajeev, Balamuruhan S, Bharata B Rao, Bill Wendling, Bin Meng, Cédric Le Goater, Chris Packham, Christophe Leroy, Christoph Hellwig, Daniel Axtens, Dan Williams, David Lamparter, Desnes A. Nunes do Rosario, Erhard F., Finn Thain, Frederic Barrat, Ganesh Goudar, Gautham R. Shenoy, Geoff Levand, Greg Kurz, Gustavo A. R. Silva, Hari Bathini, Harish, Imre Kaloz, Joel Stanley, Joe Perches, John Crispin, Jordan Niethe, Kajol Jain, Kamalesh Babulal, Kees Cook, Laurent Dufour, Leonardo Bras, Li RongQing, Madhavan Srinivasan, Mahesh Salgaonkar, Mark Cave-Ayland, Michal Suchanek, Milton Miller, Mimi Zohar, Murilo Opsfelder Araujo, Nathan Chancellor, Nathan Lynch, Naveen N. Rao, Nayna Jain, Nicholas Piggin, Oliver O'Halloran, Palmer Dabbelt, Pedro Miraglia Franco de Carvalho, Philippe Bergheaud, Pingfan Liu, Pratik Rajesh Sampat, Qian Cai, Qinglang Miao, Randy Dunlap, Ravi Bangoria, Sachin Sant, Sam Bobroff, Sandipan Das, Santosh Sivaraj, Satheesh Rajendran, Shirisha Ganta, Sourabh Jain, Srikar Dronamraju, Stan Johnson, Stephen Rothwell, Thadeu Lima de Souza Cascardo, Thiago Jung Bauermann, Tom Lane, Vaibhav Jain, Vladis Dronov, Wei Yongjun, Wen Xiong, YueHaibing. * tag 'powerpc-5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (337 commits) selftests/powerpc: Fix pkey syscall redefinitions powerpc: Fix circular dependency between percpu.h and mmu.h powerpc/powernv/sriov: Fix use of uninitialised variable selftests/powerpc: Skip vmx/vsx/tar/etc tests on older CPUs powerpc/40x: Fix assembler warning about r0 powerpc/papr_scm: Add support for fetching nvdimm 'fuel-gauge' metric powerpc/papr_scm: Fetch nvdimm performance stats from PHYP cpuidle: pseries: Fixup exit latency for CEDE(0) cpuidle: pseries: Add function to parse extended CEDE records cpuidle: pseries: Set the latency-hint before entering CEDE selftests/powerpc: Fix online CPU selection powerpc/perf: Consolidate perf_callchain_user_[64|32]() powerpc/pseries/hotplug-cpu: Remove double free in error path powerpc/pseries/mobility: Add pr_debug() for device tree changes powerpc/pseries/mobility: Set pr_fmt() powerpc/cacheinfo: Warn if cache object chain becomes unordered powerpc/cacheinfo: Improve diagnostics about malformed cache lists powerpc/cacheinfo: Use name@unit instead of full DT path in debug messages powerpc/cacheinfo: Set pr_fmt() powerpc: fix function annotations to avoid section mismatch warnings with gcc-10 ...
Diffstat (limited to 'arch/powerpc/perf/core-book3s.c')
-rw-r--r--arch/powerpc/perf/core-book3s.c108
1 files changed, 75 insertions, 33 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 01d70280d287..78fe34986594 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -37,12 +37,7 @@ struct cpu_hw_events {
struct perf_event *event[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int flags[MAX_HWEVENTS];
- /*
- * The order of the MMCR array is:
- * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2
- * - 32-bit, MMCR0, MMCR1, MMCR2
- */
- unsigned long mmcr[4];
+ struct mmcr_regs mmcr;
struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
@@ -77,6 +72,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
/*
* 32-bit doesn't have MMCRA but does have an MMCR2,
* and a few other names are different.
+ * Also 32-bit doesn't have MMCR3, SIER2 and SIER3.
+ * Define them as zero knowing that any code path accessing
+ * these registers (via mtspr/mfspr) are done under ppmu flag
+ * check for PPMU_ARCH_31 and we will not enter that code path
+ * for 32-bit.
*/
#ifdef CONFIG_PPC32
@@ -90,7 +90,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
#define MMCR0_PMCC_U6 0
#define SPRN_MMCRA SPRN_MMCR2
+#define SPRN_MMCR3 0
+#define SPRN_SIER2 0
+#define SPRN_SIER3 0
#define MMCRA_SAMPLE_ENABLE 0
+#define MMCRA_BHRB_DISABLE 0
static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
@@ -121,7 +125,7 @@ static void ebb_event_add(struct perf_event *event) { }
static void ebb_switch_out(unsigned long mmcr0) { }
static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
{
- return cpuhw->mmcr[0];
+ return cpuhw->mmcr.mmcr0;
}
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
@@ -466,8 +470,11 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
* addresses at this point. Check the privileges before
* exporting it to userspace (avoid exposure of regions
* where we could have speculative execution)
+ * Incase of ISA v3.1, BHRB will capture only user-space
+ * addresses, hence include a check before filtering code
*/
- if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
+ if (!(ppmu->flags & PPMU_ARCH_31) &&
+ is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -586,11 +593,16 @@ static void ebb_switch_out(unsigned long mmcr0)
current->thread.sdar = mfspr(SPRN_SDAR);
current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK;
current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
+ if (ppmu->flags & PPMU_ARCH_31) {
+ current->thread.mmcr3 = mfspr(SPRN_MMCR3);
+ current->thread.sier2 = mfspr(SPRN_SIER2);
+ current->thread.sier3 = mfspr(SPRN_SIER3);
+ }
}
static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
{
- unsigned long mmcr0 = cpuhw->mmcr[0];
+ unsigned long mmcr0 = cpuhw->mmcr.mmcr0;
if (!ebb)
goto out;
@@ -624,7 +636,13 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
* unfreeze counters, it should not set exclude_xxx in its events and
* instead manage the MMCR2 entirely by itself.
*/
- mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2);
+ mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2);
+
+ if (ppmu->flags & PPMU_ARCH_31) {
+ mtspr(SPRN_MMCR3, current->thread.mmcr3);
+ mtspr(SPRN_SIER2, current->thread.sier2);
+ mtspr(SPRN_SIER3, current->thread.sier3);
+ }
out:
return mmcr0;
}
@@ -845,6 +863,11 @@ void perf_event_print_debug(void)
pr_info("EBBRR: %016lx BESCR: %016lx\n",
mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
}
+
+ if (ppmu->flags & PPMU_ARCH_31) {
+ pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n",
+ mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3));
+ }
#endif
pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n",
mfspr(SPRN_SIAR), sdar, sier);
@@ -1196,7 +1219,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
static void power_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
- unsigned long flags, mmcr0, val;
+ unsigned long flags, mmcr0, val, mmcra;
if (!ppmu)
return;
@@ -1229,12 +1252,24 @@ static void power_pmu_disable(struct pmu *pmu)
mb();
isync();
+ val = mmcra = cpuhw->mmcr.mmcra;
+
/*
* Disable instruction sampling if it was enabled
*/
- if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
- mtspr(SPRN_MMCRA,
- cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+ if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE)
+ val &= ~MMCRA_SAMPLE_ENABLE;
+
+ /* Disable BHRB via mmcra (BHRBRD) for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCRA_BHRB_DISABLE;
+
+ /*
+ * Write SPRN_MMCRA if mmcra has either disabled
+ * instruction sampling or BHRB.
+ */
+ if (val != mmcra) {
+ mtspr(SPRN_MMCRA, mmcra);
mb();
isync();
}
@@ -1308,18 +1343,20 @@ static void power_pmu_enable(struct pmu *pmu)
* (possibly updated for removal of events).
*/
if (!cpuhw->n_added) {
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
- mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ if (ppmu->flags & PPMU_ARCH_31)
+ mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
goto out_enable;
}
/*
* Clear all MMCR settings and recompute them for the new set of events.
*/
- memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
+ memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
- cpuhw->mmcr, cpuhw->event)) {
+ &cpuhw->mmcr, cpuhw->event)) {
/* shouldn't ever get here */
printk(KERN_ERR "oops compute_mmcr failed\n");
goto out;
@@ -1333,11 +1370,11 @@ static void power_pmu_enable(struct pmu *pmu)
*/
event = cpuhw->event[0];
if (event->attr.exclude_user)
- cpuhw->mmcr[0] |= MMCR0_FCP;
+ cpuhw->mmcr.mmcr0 |= MMCR0_FCP;
if (event->attr.exclude_kernel)
- cpuhw->mmcr[0] |= freeze_events_kernel;
+ cpuhw->mmcr.mmcr0 |= freeze_events_kernel;
if (event->attr.exclude_hv)
- cpuhw->mmcr[0] |= MMCR0_FCHV;
+ cpuhw->mmcr.mmcr0 |= MMCR0_FCHV;
}
/*
@@ -1346,12 +1383,15 @@ static void power_pmu_enable(struct pmu *pmu)
* Then unfreeze the events.
*/
ppc_set_pmu_inuse(1);
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
- mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
- mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
| MMCR0_FC);
if (ppmu->flags & PPMU_ARCH_207S)
- mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
+ mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2);
+
+ if (ppmu->flags & PPMU_ARCH_31)
+ mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
/*
* Read off any pre-existing events that need to move
@@ -1402,7 +1442,7 @@ static void power_pmu_enable(struct pmu *pmu)
perf_event_update_userpage(event);
}
cpuhw->n_limited = n_lim;
- cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+ cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE;
out_enable:
pmao_restore_workaround(ebb);
@@ -1418,9 +1458,9 @@ static void power_pmu_enable(struct pmu *pmu)
/*
* Enable instruction sampling if necessary
*/
- if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+ if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) {
mb();
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra);
}
out:
@@ -1550,7 +1590,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
cpuhw->flags[i-1] = cpuhw->flags[i];
}
--cpuhw->n_events;
- ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
+ ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr);
if (event->hw.idx) {
write_pmc(event->hw.idx, 0);
event->hw.idx = 0;
@@ -1571,7 +1611,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
}
if (cpuhw->n_events == 0) {
/* disable exceptions if no events are running */
- cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+ cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE);
}
if (has_branch_stack(event))
@@ -1795,7 +1835,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
static int hw_perf_cache_event(u64 config, u64 *eventp)
{
unsigned long type, op, result;
- int ev;
+ u64 ev;
if (!ppmu->cache_events)
return -EINVAL;
@@ -2246,7 +2286,7 @@ static void __perf_event_interrupt(struct pt_regs *regs)
* XXX might want to use MSR.PM to keep the events frozen until
* we get back out of this interrupt.
*/
- write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
if (nmi)
nmi_exit();
@@ -2268,7 +2308,7 @@ static int power_pmu_prepare_cpu(unsigned int cpu)
if (ppmu) {
memset(cpuhw, 0, sizeof(*cpuhw));
- cpuhw->mmcr[0] = MMCR0_FC;
+ cpuhw->mmcr.mmcr0 = MMCR0_FC;
}
return 0;
}
@@ -2314,6 +2354,8 @@ static int __init init_ppc64_pmu(void)
return 0;
else if (!init_power9_pmu())
return 0;
+ else if (!init_power10_pmu())
+ return 0;
else if (!init_ppc970_pmu())
return 0;
else