summaryrefslogtreecommitdiff
path: root/arch/sparc/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc/kernel/perf_event.c')
-rw-r--r--arch/sparc/kernel/perf_event.c243
1 files changed, 159 insertions, 84 deletions
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index b5c38faa4ead..cae4d33002a5 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Performance event support for sparc64.
*
* Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net>
@@ -9,7 +10,7 @@
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
*/
#include <linux/perf_event.h>
@@ -21,8 +22,9 @@
#include <asm/stacktrace.h>
#include <asm/cpudata.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include <linux/sched/clock.h>
#include <asm/nmi.h>
#include <asm/pcr.h>
#include <asm/cacheflush.h>
@@ -108,9 +110,9 @@ struct cpu_hw_events {
/* Enabled/disable state. */
int enabled;
- unsigned int group_flag;
+ unsigned int txn_flags;
};
-DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
/* An event map describes the characteristics of a performance
* counter event. In particular it gives the encoding as well as
@@ -737,25 +739,9 @@ static void sparc_vt_write_pmc(int idx, u64 val)
{
u64 pcr;
- /* There seems to be an internal latch on the overflow event
- * on SPARC-T4 that prevents it from triggering unless you
- * update the PIC exactly as we do here. The requirement
- * seems to be that you have to turn off event counting in the
- * PCR around the PIC update.
- *
- * For example, after the following sequence:
- *
- * 1) set PIC to -1
- * 2) enable event counting and overflow reporting in PCR
- * 3) overflow triggers, softint 15 handler invoked
- * 4) clear OV bit in PCR
- * 5) write PIC to -1
- *
- * a subsequent overflow event will not trigger. This
- * sequence works on SPARC-T3 and previous chips.
- */
pcr = pcr_ops->read_pcr(idx);
- pcr_ops->write_pcr(idx, PCR_N4_PICNPT);
+ /* ensure ov and ntc are reset */
+ pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
pcr_ops->write_pic(idx, val & 0xffffffff);
@@ -792,6 +778,29 @@ static const struct sparc_pmu niagara4_pmu = {
.num_pic_regs = 4,
};
+static const struct sparc_pmu sparc_m7_pmu = {
+ .event_map = niagara4_event_map,
+ .cache_map = &niagara4_cache_map,
+ .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
+ .read_pmc = sparc_vt_read_pmc,
+ .write_pmc = sparc_vt_write_pmc,
+ .upper_shift = 5,
+ .lower_shift = 5,
+ .event_mask = 0x7ff,
+ .user_bit = PCR_N4_UTRACE,
+ .priv_bit = PCR_N4_STRACE,
+
+ /* We explicitly don't support hypervisor tracing. */
+ .hv_bit = 0,
+
+ .irq_bit = PCR_N4_TOE,
+ .upper_nop = 0,
+ .lower_nop = 0,
+ .flags = 0,
+ .max_hw_events = 4,
+ .num_pcrs = 4,
+ .num_pic_regs = 4,
+};
static const struct sparc_pmu *sparc_pmu __read_mostly;
static u64 event_encoding(u64 event_id, int idx)
@@ -882,6 +891,10 @@ static int sparc_perf_event_set_period(struct perf_event *event,
s64 period = hwc->sample_period;
int ret = 0;
+ /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
+ if (unlikely(period != hwc->last_period))
+ left = period - (hwc->last_period - left);
+
if (unlikely(left <= -period)) {
left = period;
local64_set(&hwc->period_left, left);
@@ -919,6 +932,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc)
sparc_perf_event_update(cp, &cp->hw,
cpuc->current_idx[i]);
cpuc->current_idx[i] = PIC_NO_INDEX;
+ if (cp->hw.state & PERF_HES_STOPPED)
+ cp->hw.state |= PERF_HES_ARCH;
}
}
}
@@ -951,16 +966,20 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc)
enc = perf_event_get_enc(cpuc->events[i]);
cpuc->pcr[0] &= ~mask_for_index(idx);
- if (hwc->state & PERF_HES_STOPPED)
+ if (hwc->state & PERF_HES_ARCH) {
cpuc->pcr[0] |= nop_for_index(idx);
- else
+ } else {
cpuc->pcr[0] |= event_encoding(enc, idx);
+ hwc->state = 0;
+ }
}
out:
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
}
-/* On this PMU each PIC has it's own PCR control register. */
+static void sparc_pmu_start(struct perf_event *event, int flags);
+
+/* On this PMU each PIC has its own PCR control register. */
static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
{
int i;
@@ -972,20 +991,16 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
struct perf_event *cp = cpuc->event[i];
struct hw_perf_event *hwc = &cp->hw;
int idx = hwc->idx;
- u64 enc;
if (cpuc->current_idx[i] != PIC_NO_INDEX)
continue;
- sparc_perf_event_set_period(cp, hwc, idx);
cpuc->current_idx[i] = idx;
- enc = perf_event_get_enc(cpuc->events[i]);
- cpuc->pcr[idx] &= ~mask_for_index(idx);
- if (hwc->state & PERF_HES_STOPPED)
- cpuc->pcr[idx] |= nop_for_index(idx);
- else
- cpuc->pcr[idx] |= event_encoding(enc, idx);
+ if (cp->hw.state & PERF_HES_ARCH)
+ continue;
+
+ sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
for (i = 0; i < cpuc->n_events; i++) {
@@ -1013,7 +1028,7 @@ static void update_pcrs_for_enable(struct cpu_hw_events *cpuc)
static void sparc_pmu_enable(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
if (cpuc->enabled)
@@ -1031,7 +1046,7 @@ static void sparc_pmu_enable(struct pmu *pmu)
static void sparc_pmu_disable(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
if (!cpuc->enabled)
@@ -1065,7 +1080,7 @@ static int active_event_index(struct cpu_hw_events *cpuc,
static void sparc_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx = active_event_index(cpuc, event);
if (flags & PERF_EF_RELOAD) {
@@ -1076,11 +1091,13 @@ static void sparc_pmu_start(struct perf_event *event, int flags)
event->hw.state = 0;
sparc_pmu_enable_event(cpuc, &event->hw, idx);
+
+ perf_event_update_userpage(event);
}
static void sparc_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx = active_event_index(cpuc, event);
if (!(event->hw.state & PERF_HES_STOPPED)) {
@@ -1096,12 +1113,11 @@ static void sparc_pmu_stop(struct perf_event *event, int flags)
static void sparc_pmu_del(struct perf_event *event, int _flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
unsigned long flags;
int i;
local_irq_save(flags);
- perf_pmu_disable(event->pmu);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event[i]) {
@@ -1127,13 +1143,12 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
}
}
- perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
static void sparc_pmu_read(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx = active_event_index(cpuc, event);
struct hw_perf_event *hwc = &event->hw;
@@ -1145,7 +1160,7 @@ static DEFINE_MUTEX(pmc_grab_mutex);
static void perf_stop_nmi_watchdog(void *unused)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int i;
stop_nmi_watchdog(NULL);
@@ -1153,7 +1168,7 @@ static void perf_stop_nmi_watchdog(void *unused)
cpuc->pcr[i] = pcr_ops->read_pcr(i);
}
-void perf_event_grab_pmc(void)
+static void perf_event_grab_pmc(void)
{
if (atomic_inc_not_zero(&active_events))
return;
@@ -1169,7 +1184,7 @@ void perf_event_grab_pmc(void)
mutex_unlock(&pmc_grab_mutex);
}
-void perf_event_release_pmc(void)
+static void perf_event_release_pmc(void)
{
if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
if (atomic_read(&nmi_active) == 0)
@@ -1341,7 +1356,7 @@ static int collect_events(struct perf_event *group, int max_count,
events[n] = group->hw.event_base;
current_idx[n++] = PIC_NO_INDEX;
}
- list_for_each_entry(event, &group->sibling_list, group_entry) {
+ for_each_sibling_event(event, group) {
if (!is_software_event(event) &&
event->state != PERF_EVENT_STATE_OFF) {
if (n >= max_count)
@@ -1356,12 +1371,11 @@ static int collect_events(struct perf_event *group, int max_count,
static int sparc_pmu_add(struct perf_event *event, int ef_flags)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int n0, ret = -EAGAIN;
unsigned long flags;
local_irq_save(flags);
- perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
if (n0 >= sparc_pmu->max_hw_events)
@@ -1371,16 +1385,16 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
cpuc->events[n0] = event->hw.event_base;
cpuc->current_idx[n0] = PIC_NO_INDEX;
- event->hw.state = PERF_HES_UPTODATE;
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
if (!(ef_flags & PERF_EF_START))
- event->hw.state |= PERF_HES_STOPPED;
+ event->hw.state |= PERF_HES_ARCH;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be performed
* at commit time(->commit_txn) as a whole
*/
- if (cpuc->group_flag & PERF_EVENT_TXN)
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto nocheck;
if (check_excludes(cpuc->event, n0, 1))
@@ -1394,7 +1408,6 @@ nocheck:
ret = 0;
out:
- perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
}
@@ -1496,12 +1509,17 @@ static int sparc_pmu_event_init(struct perf_event *event)
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
*/
-static void sparc_pmu_start_txn(struct pmu *pmu)
+static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */
+
+ cpuhw->txn_flags = txn_flags;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
perf_pmu_disable(pmu);
- cpuhw->group_flag |= PERF_EVENT_TXN;
}
/*
@@ -1511,9 +1529,16 @@ static void sparc_pmu_start_txn(struct pmu *pmu)
*/
static void sparc_pmu_cancel_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ unsigned int txn_flags;
+
+ WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */
+
+ txn_flags = cpuhw->txn_flags;
+ cpuhw->txn_flags = 0;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
- cpuhw->group_flag &= ~PERF_EVENT_TXN;
perf_pmu_enable(pmu);
}
@@ -1524,20 +1549,26 @@ static void sparc_pmu_cancel_txn(struct pmu *pmu)
*/
static int sparc_pmu_commit_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int n;
if (!sparc_pmu)
return -EINVAL;
- cpuc = &__get_cpu_var(cpu_hw_events);
+ WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */
+
+ if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+ cpuc->txn_flags = 0;
+ return 0;
+ }
+
n = cpuc->n_events;
if (check_excludes(cpuc->event, 0, n))
return -EINVAL;
if (sparc_check_constraints(cpuc->event, cpuc->events, n))
return -EAGAIN;
- cpuc->group_flag &= ~PERF_EVENT_TXN;
+ cpuc->txn_flags = 0;
perf_pmu_enable(pmu);
return 0;
}
@@ -1586,6 +1617,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
+ u64 finish_clock;
+ u64 start_clock;
int i;
if (!atomic_read(&active_events))
@@ -1599,9 +1632,11 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
return NOTIFY_DONE;
}
+ start_clock = sched_clock();
+
regs = args->regs;
- cpuc = &__get_cpu_var(cpu_hw_events);
+ cpuc = this_cpu_ptr(&cpu_hw_events);
/* If the PMU has the TOE IRQ enable bits, we need to do a
* dummy write to the %pcr to clear the overflow bits and thus
@@ -1633,10 +1668,13 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
if (!sparc_perf_event_set_period(event, hwc, idx))
continue;
- if (perf_event_overflow(event, &data, regs))
- sparc_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
+ finish_clock = sched_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
return NOTIFY_STOP;
}
@@ -1662,18 +1700,26 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara2_pmu;
return true;
}
- if (!strcmp(sparc_pmu_type, "niagara4")) {
+ if (!strcmp(sparc_pmu_type, "niagara4") ||
+ !strcmp(sparc_pmu_type, "niagara5")) {
sparc_pmu = &niagara4_pmu;
return true;
}
+ if (!strcmp(sparc_pmu_type, "sparc-m7")) {
+ sparc_pmu = &sparc_m7_pmu;
+ return true;
+ }
return false;
}
-int __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
{
+ int err;
+
pr_info("Performance events: ");
- if (!supported_pmu()) {
+ err = pcr_arch_init();
+ if (err || !supported_pmu()) {
pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
return 0;
}
@@ -1685,9 +1731,9 @@ int __init init_hw_perf_events(void)
return 0;
}
-early_initcall(init_hw_perf_events);
+pure_initcall(init_hw_perf_events);
-void perf_callchain_kernel(struct perf_callchain_entry *entry,
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
unsigned long ksp, fp;
@@ -1724,78 +1770,107 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
perf_callchain_store(entry, pc);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if ((pc + 8UL) == (unsigned long) &return_to_handler) {
- int index = current->curr_ret_stack;
- if (current->ret_stack && index >= graph) {
- pc = current->ret_stack[index - graph].ret;
+ struct ftrace_ret_stack *ret_stack;
+ ret_stack = ftrace_graph_get_ret_stack(current,
+ graph);
+ if (ret_stack) {
+ pc = ret_stack->ret;
perf_callchain_store(entry, pc);
graph++;
}
}
#endif
- } while (entry->nr < PERF_MAX_STACK_DEPTH);
+ } while (entry->nr < entry->max_stack);
+}
+
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+ /* addresses should be at least 4-byte aligned */
+ if (((unsigned long) fp) & 3)
+ return 0;
+
+ return (__range_not_ok(fp, size, TASK_SIZE) == 0);
}
-static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
unsigned long ufp;
- ufp = regs->u_regs[UREG_I6] + STACK_BIAS;
+ ufp = regs->u_regs[UREG_FP] + STACK_BIAS;
do {
- struct sparc_stackf *usf, sf;
+ struct sparc_stackf __user *usf;
+ struct sparc_stackf sf;
unsigned long pc;
- usf = (struct sparc_stackf *) ufp;
+ usf = (struct sparc_stackf __user *)ufp;
+ if (!valid_user_frame(usf, sizeof(sf)))
+ break;
+
if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
break;
pc = sf.callers_pc;
ufp = (unsigned long)sf.fp + STACK_BIAS;
perf_callchain_store(entry, pc);
- } while (entry->nr < PERF_MAX_STACK_DEPTH);
+ } while (entry->nr < entry->max_stack);
}
-static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
unsigned long ufp;
- ufp = regs->u_regs[UREG_I6] & 0xffffffffUL;
+ ufp = regs->u_regs[UREG_FP] & 0xffffffffUL;
do {
unsigned long pc;
if (thread32_stack_is_64bit(ufp)) {
- struct sparc_stackf *usf, sf;
+ struct sparc_stackf __user *usf;
+ struct sparc_stackf sf;
ufp += STACK_BIAS;
- usf = (struct sparc_stackf *) ufp;
+ usf = (struct sparc_stackf __user *)ufp;
if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
break;
pc = sf.callers_pc & 0xffffffff;
ufp = ((unsigned long) sf.fp) & 0xffffffff;
} else {
- struct sparc_stackf32 *usf, sf;
- usf = (struct sparc_stackf32 *) ufp;
+ struct sparc_stackf32 __user *usf;
+ struct sparc_stackf32 sf;
+ usf = (struct sparc_stackf32 __user *)ufp;
if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
break;
pc = sf.callers_pc;
ufp = (unsigned long)sf.fp;
}
perf_callchain_store(entry, pc);
- } while (entry->nr < PERF_MAX_STACK_DEPTH);
+ } while (entry->nr < entry->max_stack);
}
void
-perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
+ u64 saved_fault_address = current_thread_info()->fault_address;
+ u8 saved_fault_code = get_thread_fault_code();
+
perf_callchain_store(entry, regs->tpc);
if (!current->mm)
return;
flushw_user();
+
+ pagefault_disable();
+
if (test_thread_flag(TIF_32BIT))
perf_callchain_user_32(entry, regs);
else
perf_callchain_user_64(entry, regs);
+
+ pagefault_enable();
+
+ set_thread_fault_code(saved_fault_code);
+ current_thread_info()->fault_address = saved_fault_address;
}