summaryrefslogtreecommitdiff
path: root/arch/x86/events/amd/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/amd/core.c')
-rw-r--r--arch/x86/events/amd/core.c57
1 files changed, 32 insertions, 25 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 1fc4ce44e743..b20661b8621d 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -9,6 +9,7 @@
#include <linux/jiffies.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
+#include <asm/msr.h>
#include <asm/nmi.h>
#include "../perf_event.h"
@@ -432,8 +433,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
* be removed on one CPU at a time AND PMU is disabled
* when we come here
*/
- for (i = 0; i < x86_pmu.num_counters; i++) {
- if (cmpxchg(nb->owners + i, event, NULL) == event)
+ for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
+ struct perf_event *tmp = event;
+
+ if (try_cmpxchg(nb->owners + i, &tmp, NULL))
break;
}
}
@@ -499,7 +502,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+ for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
if (new == -1 || hwc->idx == idx)
/* assign free slot, prefer hwc->idx */
old = cmpxchg(nb->owners + idx, NULL, event);
@@ -542,7 +545,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
/*
* initialize all possible NB constraints
*/
- for (i = 0; i < x86_pmu.num_counters; i++) {
+ for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
__set_bit(i, nb->event_constraints[i].idxmsk);
nb->event_constraints[i].weight = 1;
}
@@ -561,13 +564,13 @@ static void amd_pmu_cpu_reset(int cpu)
return;
/* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
+ wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);
/*
* Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze
* and PerfCntrGLobalStatus.PerfCntrOvfl
*/
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+ wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask);
}
@@ -649,7 +652,7 @@ static void amd_pmu_cpu_dead(int cpu)
static __always_inline void amd_pmu_set_global_ctl(u64 ctl)
{
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
+ wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
}
static inline u64 amd_pmu_get_global_status(void)
@@ -657,7 +660,7 @@ static inline u64 amd_pmu_get_global_status(void)
u64 status;
/* PerfCntrGlobalStatus is read-only */
- rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
+ rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
return status;
}
@@ -670,14 +673,14 @@ static inline void amd_pmu_ack_global_status(u64 status)
* clears the same bit in PerfCntrGlobalStatus
*/
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
+ wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
}
static bool amd_pmu_test_overflow_topbit(int idx)
{
u64 counter;
- rdmsrl(x86_pmu_event_addr(idx), counter);
+ rdmsrq(x86_pmu_event_addr(idx), counter);
return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
}
@@ -735,7 +738,7 @@ static void amd_pmu_check_overflow(void)
* counters are always enabled when this function is called and
* ARCH_PERFMON_EVENTSEL_INT is always set.
*/
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -755,7 +758,7 @@ static void amd_pmu_enable_all(int added)
amd_brs_enable_all();
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
/* only activate events which are marked as active */
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -941,11 +944,12 @@ static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, u
static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ static atomic64_t status_warned = ATOMIC64_INIT(0);
+ u64 reserved, status, mask, new_bits, prev_bits;
struct perf_sample_data data;
struct hw_perf_event *hwc;
struct perf_event *event;
int handled = 0, idx;
- u64 reserved, status, mask;
bool pmu_enabled;
/*
@@ -978,7 +982,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
/* Clear any reserved bits set by buggy microcode */
status &= amd_pmu_global_cntr_mask;
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
if (!test_bit(idx, cpuc->active_mask))
continue;
@@ -998,11 +1002,9 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!x86_perf_event_set_period(event))
continue;
- if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
/*
@@ -1010,7 +1012,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
* the corresponding PMCs are expected to be inactive according to the
* active_mask
*/
- WARN_ON(status > 0);
+ if (status > 0) {
+ prev_bits = atomic64_fetch_or(status, &status_warned);
+ // A new bit was set for the very first time.
+ new_bits = status & ~prev_bits;
+ WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits);
+ }
/* Clear overflow and freeze bits */
amd_pmu_ack_global_status(~status);
@@ -1313,7 +1320,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.addr_offset = amd_pmu_addr_offset,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = AMD64_NUM_COUNTERS,
+ .cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
.add = amd_pmu_add_event,
.del = amd_pmu_del_event,
.cntval_bits = 48,
@@ -1412,7 +1419,7 @@ static int __init amd_core_pmu_init(void)
*/
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
- x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
/* Check for Performance Monitoring v2 support */
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1422,9 +1429,9 @@ static int __init amd_core_pmu_init(void)
x86_pmu.version = 2;
/* Find the number of available Core PMCs */
- x86_pmu.num_counters = ebx.split.num_core_pmc;
+ x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
- amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
+ amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
/* Update PMC handling functions */
x86_pmu.enable_all = amd_pmu_v2_enable_all;
@@ -1452,12 +1459,12 @@ static int __init amd_core_pmu_init(void)
* even numbered counter that has a consecutive adjacent odd
* numbered counter following it.
*/
- for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
+ for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
even_ctr_mask |= BIT_ULL(i);
pair_constraint = (struct event_constraint)
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
- x86_pmu.num_counters / 2, 0,
+ x86_pmu_max_num_counters(NULL) / 2, 0,
PERF_X86_EVENT_PAIR);
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;