summaryrefslogtreecommitdiff
path: root/arch/x86/events/intel/ds.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/intel/ds.c')
-rw-r--r--arch/x86/events/intel/ds.c1035
1 files changed, 828 insertions, 207 deletions
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8afc4ad3cd16..feb1c3cf63e4 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -10,6 +10,7 @@
#include <asm/tlbflush.h>
#include <asm/insn.h>
#include <asm/io.h>
+#include <asm/msr.h>
#include <asm/timer.h>
#include "../perf_event.h"
@@ -316,7 +317,8 @@ static u64 __grt_latency_data(struct perf_event *event, u64 status,
{
u64 val;
- WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
+ WARN_ON_ONCE(is_hybrid() &&
+ hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
val = hybrid_var(event->pmu, pebs_data_source)[dse];
@@ -624,13 +626,18 @@ static int alloc_pebs_buffer(int cpu)
int max, node = cpu_to_node(cpu);
void *buffer, *insn_buff, *cea;
- if (!x86_pmu.pebs)
+ if (!intel_pmu_has_pebs())
return 0;
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
if (unlikely(!buffer))
return -ENOMEM;
+ if (x86_pmu.arch_pebs) {
+ hwev->pebs_vaddr = buffer;
+ return 0;
+ }
+
/*
* HSW+ already provides us the eventing ip; no need to allocate this
* buffer then.
@@ -643,7 +650,7 @@ static int alloc_pebs_buffer(int cpu)
}
per_cpu(insn_buffer, cpu) = insn_buff;
}
- hwev->ds_pebs_vaddr = buffer;
+ hwev->pebs_vaddr = buffer;
/* Update the cpu entry area mapping */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds->pebs_buffer_base = (unsigned long) cea;
@@ -659,17 +666,20 @@ static void release_pebs_buffer(int cpu)
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
void *cea;
- if (!x86_pmu.pebs)
+ if (!intel_pmu_has_pebs())
return;
- kfree(per_cpu(insn_buffer, cpu));
- per_cpu(insn_buffer, cpu) = NULL;
+ if (x86_pmu.ds_pebs) {
+ kfree(per_cpu(insn_buffer, cpu));
+ per_cpu(insn_buffer, cpu) = NULL;
- /* Clear the fixmap */
- cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
- ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
- dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
- hwev->ds_pebs_vaddr = NULL;
+ /* Clear the fixmap */
+ cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+ ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
+ }
+
+ dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
+ hwev->pebs_vaddr = NULL;
}
static int alloc_bts_buffer(int cpu)
@@ -734,7 +744,7 @@ void release_ds_buffers(void)
{
int cpu;
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
for_each_possible_cpu(cpu)
@@ -750,7 +760,8 @@ void release_ds_buffers(void)
}
for_each_possible_cpu(cpu) {
- release_pebs_buffer(cpu);
+ if (x86_pmu.ds_pebs)
+ release_pebs_buffer(cpu);
release_bts_buffer(cpu);
}
}
@@ -761,15 +772,17 @@ void reserve_ds_buffers(void)
int cpu;
x86_pmu.bts_active = 0;
- x86_pmu.pebs_active = 0;
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (x86_pmu.ds_pebs)
+ x86_pmu.pebs_active = 0;
+
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
if (!x86_pmu.bts)
bts_err = 1;
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
pebs_err = 1;
for_each_possible_cpu(cpu) {
@@ -781,7 +794,8 @@ void reserve_ds_buffers(void)
if (!bts_err && alloc_bts_buffer(cpu))
bts_err = 1;
- if (!pebs_err && alloc_pebs_buffer(cpu))
+ if (x86_pmu.ds_pebs && !pebs_err &&
+ alloc_pebs_buffer(cpu))
pebs_err = 1;
if (bts_err && pebs_err)
@@ -793,7 +807,7 @@ void reserve_ds_buffers(void)
release_bts_buffer(cpu);
}
- if (pebs_err) {
+ if (x86_pmu.ds_pebs && pebs_err) {
for_each_possible_cpu(cpu)
release_pebs_buffer(cpu);
}
@@ -805,7 +819,7 @@ void reserve_ds_buffers(void)
if (x86_pmu.bts && !bts_err)
x86_pmu.bts_active = 1;
- if (x86_pmu.pebs && !pebs_err)
+ if (x86_pmu.ds_pebs && !pebs_err)
x86_pmu.pebs_active = 1;
for_each_possible_cpu(cpu) {
@@ -818,6 +832,56 @@ void reserve_ds_buffers(void)
}
}
+inline int alloc_arch_pebs_buf_on_cpu(int cpu)
+{
+ if (!x86_pmu.arch_pebs)
+ return 0;
+
+ return alloc_pebs_buffer(cpu);
+}
+
+inline void release_arch_pebs_buf_on_cpu(int cpu)
+{
+ if (!x86_pmu.arch_pebs)
+ return;
+
+ release_pebs_buffer(cpu);
+}
+
+void init_arch_pebs_on_cpu(int cpu)
+{
+ struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
+ u64 arch_pebs_base;
+
+ if (!x86_pmu.arch_pebs)
+ return;
+
+ if (!cpuc->pebs_vaddr) {
+ WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
+ x86_pmu.pebs_active = 0;
+ return;
+ }
+
+ /*
+ * 4KB-aligned pointer of the output buffer
+ * (__alloc_pages_node() return page aligned address)
+ * Buffer Size = 4KB * 2^SIZE
+ * contiguous physical buffer (__alloc_pages_node() with order)
+ */
+ arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
+ wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base,
+ (u32)(arch_pebs_base >> 32));
+ x86_pmu.pebs_active = 1;
+}
+
+inline void fini_arch_pebs_on_cpu(int cpu)
+{
+ if (!x86_pmu.arch_pebs)
+ return;
+
+ wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0);
+}
+
/*
* BTS
*/
@@ -953,11 +1017,11 @@ unlock:
return 1;
}
-static inline void intel_pmu_drain_pebs_buffer(void)
+void intel_pmu_drain_pebs_buffer(void)
{
struct perf_sample_data data;
- x86_pmu.drain_pebs(NULL, &data);
+ static_call(x86_pmu_drain_pebs)(NULL, &data);
}
/*
@@ -1199,7 +1263,7 @@ struct event_constraint intel_lnc_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
- INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff),
+ INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc),
INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
@@ -1294,6 +1358,19 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
ds->pebs_interrupt_threshold = threshold;
}
+#define PEBS_DATACFG_CNTRS(x) \
+ ((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK)
+
+#define PEBS_DATACFG_CNTR_BIT(x) \
+ (((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT)
+
+#define PEBS_DATACFG_FIX(x) \
+ ((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK)
+
+#define PEBS_DATACFG_FIX_BIT(x) \
+ (((1ULL << (x)) & PEBS_DATACFG_FIX_MASK) \
+ << PEBS_DATACFG_FIX_SHIFT)
+
static void adaptive_pebs_record_size_update(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1308,10 +1385,57 @@ static void adaptive_pebs_record_size_update(void)
sz += sizeof(struct pebs_xmm);
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);
+ if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) {
+ sz += sizeof(struct pebs_cntr_header);
+
+ /* Metrics base and Metrics Data */
+ if (pebs_data_cfg & PEBS_DATACFG_METRICS)
+ sz += 2 * sizeof(u64);
+
+ if (pebs_data_cfg & PEBS_DATACFG_CNTR) {
+ sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) +
+ hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) *
+ sizeof(u64);
+ }
+ }
cpuc->pebs_record_size = sz;
}
+static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
+ int idx, u64 *pebs_data_cfg)
+{
+ if (is_metric_event(event)) {
+ *pebs_data_cfg |= PEBS_DATACFG_METRICS;
+ return;
+ }
+
+ *pebs_data_cfg |= PEBS_DATACFG_CNTR;
+
+ if (idx >= INTEL_PMC_IDX_FIXED)
+ *pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED);
+ else
+ *pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx);
+}
+
+
+void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
+{
+ struct perf_event *event;
+ u64 pebs_data_cfg = 0;
+ int i;
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ event = cpuc->event_list[i];
+ if (!is_pebs_counter_event_group(event))
+ continue;
+ __intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg);
+ }
+
+ if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
+ cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
+}
+
#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_WEIGHT_TYPE | \
@@ -1338,8 +1462,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
* + precise_ip < 2 for the non event IP
* + For RTM TSX weight we need GPRs for the abort code.
*/
- gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
- (attr->sample_regs_intr & PEBS_GP_REGS);
+ gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) &&
+ (attr->sample_regs_intr & PEBS_GP_REGS)) ||
+ ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (attr->sample_regs_user & PEBS_GP_REGS));
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
((attr->config & INTEL_ARCH_EVENT_MASK) ==
@@ -1403,6 +1529,25 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
}
}
+u64 intel_get_arch_pebs_data_config(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 pebs_data_cfg = 0;
+ u64 cntr_mask;
+
+ if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
+ return 0;
+
+ pebs_data_cfg |= pebs_update_adaptive_cfg(event);
+
+ cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
+ (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
+ PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
+ pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
+
+ return pebs_data_cfg;
+}
+
void intel_pmu_pebs_add(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1454,7 +1599,7 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
else
value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
}
- wrmsrl(base + idx, value);
+ wrmsrq(base + idx, value);
}
static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
@@ -1464,6 +1609,15 @@ static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
intel_pmu_drain_pebs_buffer();
}
+static void __intel_pmu_pebs_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
+ cpuc->pebs_enabled |= 1ULL << hwc->idx;
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1472,9 +1626,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
struct debug_store *ds = cpuc->ds;
unsigned int idx = hwc->idx;
- hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
-
- cpuc->pebs_enabled |= 1ULL << hwc->idx;
+ __intel_pmu_pebs_enable(event);
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
@@ -1489,9 +1641,9 @@ void intel_pmu_pebs_enable(struct perf_event *event)
* hence we need to drain when changing said
* size.
*/
- intel_pmu_drain_large_pebs(cpuc);
+ intel_pmu_drain_pebs_buffer();
adaptive_pebs_record_size_update();
- wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
+ wrmsrq(MSR_PEBS_DATA_CFG, pebs_data_cfg);
cpuc->active_pebs_data_cfg = pebs_data_cfg;
}
}
@@ -1536,14 +1688,22 @@ void intel_pmu_pebs_del(struct perf_event *event)
pebs_update_state(needed_cb, cpuc, event, false);
}
-void intel_pmu_pebs_disable(struct perf_event *event)
+static void __intel_pmu_pebs_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
intel_pmu_drain_large_pebs(cpuc);
-
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+ hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+}
+
+void intel_pmu_pebs_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ __intel_pmu_pebs_disable(event);
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
(x86_pmu.version < 5))
@@ -1554,9 +1714,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
intel_pmu_pebs_via_pt_disable(event);
if (cpuc->enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
-
- hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+ wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}
void intel_pmu_pebs_enable_all(void)
@@ -1564,7 +1722,7 @@ void intel_pmu_pebs_enable_all(void)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (cpuc->pebs_enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+ wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}
void intel_pmu_pebs_disable_all(void)
@@ -1765,8 +1923,6 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
perf_sample_data_init(data, 0, event->hw.last_period);
- data->period = event->hw.last_period;
-
/*
* Use latency for weight (only avail with PEBS-LL)
*/
@@ -1789,8 +1945,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN)
- perf_sample_save_callchain(data, event, iregs);
+ perf_sample_save_callchain(data, event, iregs);
/*
* We use the interrupt regs as a base because the PEBS record does not
@@ -1889,8 +2044,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if (x86_pmu.intel_cap.pebs_format >= 3)
setup_pebs_time(event, data, pebs->tsc);
- if (has_branch_stack(event))
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1916,27 +2070,187 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
#endif
}
+static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc)
+{
+ int shift = 64 - x86_pmu.cntval_bits;
+ struct hw_perf_event *hwc;
+ u64 delta, prev_pmc;
+
+ /*
+ * A recorded counter may not have an assigned event in the
+ * following cases. The value should be dropped.
+ * - An event is deleted. There is still an active PEBS event.
+ * The PEBS record doesn't shrink on pmu::del().
+ * If the counter of the deleted event once occurred in a PEBS
+ * record, PEBS still records the counter until the counter is
+ * reassigned.
+ * - An event is stopped for some reason, e.g., throttled.
+ * During this period, another event is added and takes the
+ * counter of the stopped event. The stopped event is assigned
+ * to another new and uninitialized counter, since the
+ * x86_pmu_start(RELOAD) is not invoked for a stopped event.
+ * The PEBS__DATA_CFG is updated regardless of the event state.
+ * The uninitialized counter can be recorded in a PEBS record.
+ * But the cpuc->events[uninitialized_counter] is always NULL,
+ * because the event is stopped. The uninitialized value is
+ * safely dropped.
+ */
+ if (!event)
+ return;
+
+ hwc = &event->hw;
+ prev_pmc = local64_read(&hwc->prev_count);
+
+ /* Only update the count when the PMU is disabled */
+ WARN_ON(this_cpu_read(cpu_hw_events.enabled));
+ local64_set(&hwc->prev_count, pmc);
+
+ delta = (pmc << shift) - (prev_pmc << shift);
+ delta >>= shift;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+}
+
+static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
+ struct perf_event *event,
+ struct pebs_cntr_header *cntr,
+ void *next_record)
+{
+ int bit;
+
+ for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) {
+ intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record);
+ next_record += sizeof(u64);
+ }
+
+ for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) {
+ /* The slots event will be handled with perf_metric later */
+ if ((cntr->metrics == INTEL_CNTR_METRICS) &&
+ (bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) {
+ next_record += sizeof(u64);
+ continue;
+ }
+ intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED],
+ *(u64 *)next_record);
+ next_record += sizeof(u64);
+ }
+
+ /* HW will reload the value right after the overflow. */
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period);
+
+ if (cntr->metrics == INTEL_CNTR_METRICS) {
+ static_call(intel_pmu_update_topdown_event)
+ (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS],
+ (u64 *)next_record);
+ next_record += 2 * sizeof(u64);
+ }
+}
+
#define PEBS_LATENCY_MASK 0xffff
-#define PEBS_CACHE_LATENCY_OFFSET 32
-#define PEBS_RETIRE_LATENCY_OFFSET 32
+
+static inline void __setup_perf_sample_data(struct perf_event *event,
+ struct pt_regs *iregs,
+ struct perf_sample_data *data)
+{
+ perf_sample_data_init(data, 0, event->hw.last_period);
+
+ /*
+ * We must however always use iregs for the unwinder to stay sane; the
+ * record BP,SP,IP can point into thin air when the record is from a
+ * previous PMI context or an (I)RET happened between the record and
+ * PMI.
+ */
+ perf_sample_save_callchain(data, event, iregs);
+}
+
+static inline void __setup_pebs_basic_group(struct perf_event *event,
+ struct pt_regs *regs,
+ struct perf_sample_data *data,
+ u64 sample_type, u64 ip,
+ u64 tsc, u16 retire)
+{
+ /* The ip in basic is EventingIP */
+ set_linear_ip(regs, ip);
+ regs->flags = PERF_EFLAGS_EXACT;
+ setup_pebs_time(event, data, tsc);
+
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+ data->weight.var3_w = retire;
+}
+
+static inline void __setup_pebs_gpr_group(struct perf_event *event,
+ struct pt_regs *regs,
+ struct pebs_gprs *gprs,
+ u64 sample_type)
+{
+ if (event->attr.precise_ip < 2) {
+ set_linear_ip(regs, gprs->ip);
+ regs->flags &= ~PERF_EFLAGS_EXACT;
+ }
+
+ if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
+ adaptive_pebs_save_regs(regs, gprs);
+}
+
+static inline void __setup_pebs_meminfo_group(struct perf_event *event,
+ struct perf_sample_data *data,
+ u64 sample_type, u64 latency,
+ u16 instr_latency, u64 address,
+ u64 aux, u64 tsx_tuning, u64 ax)
+{
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
+ u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
+
+ data->weight.var2_w = instr_latency;
+
+ /*
+ * Although meminfo::latency is defined as a u64,
+ * only the lower 32 bits include the valid data
+ * in practice on Ice Lake and earlier platforms.
+ */
+ if (sample_type & PERF_SAMPLE_WEIGHT)
+ data->weight.full = latency ?: tsx_latency;
+ else
+ data->weight.var1_dw = (u32)latency ?: tsx_latency;
+
+ data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+ }
+
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
+ data->data_src.val = get_data_src(event, aux);
+ data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+ }
+
+ if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
+ data->addr = address;
+ data->sample_flags |= PERF_SAMPLE_ADDR;
+ }
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION) {
+ data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
+ data->sample_flags |= PERF_SAMPLE_TRANSACTION;
+ }
+}
/*
* With adaptive PEBS the layout depends on what fields are configured.
*/
-
static void setup_pebs_adaptive_sample_data(struct perf_event *event,
struct pt_regs *iregs, void *__pebs,
struct perf_sample_data *data,
struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 sample_type = event->attr.sample_type;
struct pebs_basic *basic = __pebs;
void *next_record = basic + 1;
- u64 sample_type;
- u64 format_size;
struct pebs_meminfo *meminfo = NULL;
struct pebs_gprs *gprs = NULL;
struct x86_perf_regs *perf_regs;
+ u64 format_group;
+ u16 retire;
if (basic == NULL)
return;
@@ -1944,110 +2258,193 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
perf_regs = container_of(regs, struct x86_perf_regs, regs);
perf_regs->xmm_regs = NULL;
- sample_type = event->attr.sample_type;
- format_size = basic->format_size;
- perf_sample_data_init(data, 0, event->hw.last_period);
- data->period = event->hw.last_period;
-
- setup_pebs_time(event, data, basic->tsc);
+ format_group = basic->format_group;
- /*
- * We must however always use iregs for the unwinder to stay sane; the
- * record BP,SP,IP can point into thin air when the record is from a
- * previous PMI context or an (I)RET happened between the record and
- * PMI.
- */
- if (sample_type & PERF_SAMPLE_CALLCHAIN)
- perf_sample_save_callchain(data, event, iregs);
+ __setup_perf_sample_data(event, iregs, data);
*regs = *iregs;
- /* The ip in basic is EventingIP */
- set_linear_ip(regs, basic->ip);
- regs->flags = PERF_EFLAGS_EXACT;
- if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
- if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
- data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
- else
- data->weight.var3_w = 0;
- }
+ /* basic group */
+ retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
+ basic->retire_latency : 0;
+ __setup_pebs_basic_group(event, regs, data, sample_type,
+ basic->ip, basic->tsc, retire);
/*
* The record for MEMINFO is in front of GP
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
* Save the pointer here but process later.
*/
- if (format_size & PEBS_DATACFG_MEMINFO) {
+ if (format_group & PEBS_DATACFG_MEMINFO) {
meminfo = next_record;
next_record = meminfo + 1;
}
- if (format_size & PEBS_DATACFG_GP) {
+ if (format_group & PEBS_DATACFG_GP) {
gprs = next_record;
next_record = gprs + 1;
- if (event->attr.precise_ip < 2) {
- set_linear_ip(regs, gprs->ip);
- regs->flags &= ~PERF_EFLAGS_EXACT;
- }
+ __setup_pebs_gpr_group(event, regs, gprs, sample_type);
+ }
- if (sample_type & PERF_SAMPLE_REGS_INTR)
- adaptive_pebs_save_regs(regs, gprs);
+ if (format_group & PEBS_DATACFG_MEMINFO) {
+ u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
+ meminfo->cache_latency : meminfo->mem_latency;
+ u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
+ meminfo->instr_latency : 0;
+ u64 ax = gprs ? gprs->ax : 0;
+
+ __setup_pebs_meminfo_group(event, data, sample_type, latency,
+ instr_latency, meminfo->address,
+ meminfo->aux, meminfo->tsx_tuning,
+ ax);
}
- if (format_size & PEBS_DATACFG_MEMINFO) {
- if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
- u64 weight = meminfo->latency;
+ if (format_group & PEBS_DATACFG_XMMS) {
+ struct pebs_xmm *xmm = next_record;
- if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
- data->weight.var2_w = weight & PEBS_LATENCY_MASK;
- weight >>= PEBS_CACHE_LATENCY_OFFSET;
- }
+ next_record = xmm + 1;
+ perf_regs->xmm_regs = xmm->xmm;
+ }
- /*
- * Although meminfo::latency is defined as a u64,
- * only the lower 32 bits include the valid data
- * in practice on Ice Lake and earlier platforms.
- */
- if (sample_type & PERF_SAMPLE_WEIGHT) {
- data->weight.full = weight ?:
- intel_get_tsx_weight(meminfo->tsx_tuning);
- } else {
- data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
- intel_get_tsx_weight(meminfo->tsx_tuning);
- }
- data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
- }
+ if (format_group & PEBS_DATACFG_LBRS) {
+ struct lbr_entry *lbr = next_record;
+ int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
+ & 0xff) + 1;
+ next_record = next_record + num_lbr * sizeof(struct lbr_entry);
- if (sample_type & PERF_SAMPLE_DATA_SRC) {
- data->data_src.val = get_data_src(event, meminfo->aux);
- data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+ if (has_branch_stack(event)) {
+ intel_pmu_store_pebs_lbrs(lbr);
+ intel_pmu_lbr_save_brstack(data, cpuc, event);
}
+ }
- if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
- data->addr = meminfo->address;
- data->sample_flags |= PERF_SAMPLE_ADDR;
- }
+ if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) {
+ struct pebs_cntr_header *cntr = next_record;
+ unsigned int nr;
- if (sample_type & PERF_SAMPLE_TRANSACTION) {
- data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
- gprs ? gprs->ax : 0);
- data->sample_flags |= PERF_SAMPLE_TRANSACTION;
+ next_record += sizeof(struct pebs_cntr_header);
+ /*
+ * The PEBS_DATA_CFG is a global register, which is the
+ * superset configuration for all PEBS events.
+ * For the PEBS record of non-sample-read group, ignore
+ * the counter snapshot fields.
+ */
+ if (is_pebs_counter_event_group(event)) {
+ __setup_pebs_counter_group(cpuc, event, cntr, next_record);
+ data->sample_flags |= PERF_SAMPLE_READ;
}
+
+ nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
+ if (cntr->metrics == INTEL_CNTR_METRICS)
+ nr += 2;
+ next_record += nr * sizeof(u64);
}
- if (format_size & PEBS_DATACFG_XMMS) {
- struct pebs_xmm *xmm = next_record;
+ WARN_ONCE(next_record != __pebs + basic->format_size,
+ "PEBS record size %u, expected %llu, config %llx\n",
+ basic->format_size,
+ (u64)(next_record - __pebs),
+ format_group);
+}
- next_record = xmm + 1;
+static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
+{
+ /* Continue bit or null PEBS record indicates fragment follows. */
+ return header->cont || !(header->format & GENMASK_ULL(63, 16));
+}
+
+static void setup_arch_pebs_sample_data(struct perf_event *event,
+ struct pt_regs *iregs,
+ void *__pebs,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 sample_type = event->attr.sample_type;
+ struct arch_pebs_header *header = NULL;
+ struct arch_pebs_aux *meminfo = NULL;
+ struct arch_pebs_gprs *gprs = NULL;
+ struct x86_perf_regs *perf_regs;
+ void *next_record;
+ void *at = __pebs;
+
+ if (at == NULL)
+ return;
+
+ perf_regs = container_of(regs, struct x86_perf_regs, regs);
+ perf_regs->xmm_regs = NULL;
+
+ __setup_perf_sample_data(event, iregs, data);
+
+ *regs = *iregs;
+
+again:
+ header = at;
+ next_record = at + sizeof(struct arch_pebs_header);
+ if (header->basic) {
+ struct arch_pebs_basic *basic = next_record;
+ u16 retire = 0;
+
+ next_record = basic + 1;
+
+ if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
+ retire = basic->valid ? basic->retire : 0;
+ __setup_pebs_basic_group(event, regs, data, sample_type,
+ basic->ip, basic->tsc, retire);
+ }
+
+ /*
+ * The record for MEMINFO is in front of GP
+ * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
+ * Save the pointer here but process later.
+ */
+ if (header->aux) {
+ meminfo = next_record;
+ next_record = meminfo + 1;
+ }
+
+ if (header->gpr) {
+ gprs = next_record;
+ next_record = gprs + 1;
+
+ __setup_pebs_gpr_group(event, regs,
+ (struct pebs_gprs *)gprs,
+ sample_type);
+ }
+
+ if (header->aux) {
+ u64 ax = gprs ? gprs->ax : 0;
+
+ __setup_pebs_meminfo_group(event, data, sample_type,
+ meminfo->cache_latency,
+ meminfo->instr_latency,
+ meminfo->address, meminfo->aux,
+ meminfo->tsx_tuning, ax);
+ }
+
+ if (header->xmm) {
+ struct pebs_xmm *xmm;
+
+ next_record += sizeof(struct arch_pebs_xer_header);
+
+ xmm = next_record;
perf_regs->xmm_regs = xmm->xmm;
+ next_record = xmm + 1;
}
- if (format_size & PEBS_DATACFG_LBRS) {
- struct lbr_entry *lbr = next_record;
- int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
- & 0xff) + 1;
- next_record = next_record + num_lbr * sizeof(struct lbr_entry);
+ if (header->lbr) {
+ struct arch_pebs_lbr_header *lbr_header = next_record;
+ struct lbr_entry *lbr;
+ int num_lbr;
+
+ next_record = lbr_header + 1;
+ lbr = next_record;
+
+ num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
+ lbr_header->depth :
+ header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
+ next_record += num_lbr * sizeof(struct lbr_entry);
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
@@ -2055,11 +2452,29 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
}
}
- WARN_ONCE(next_record != __pebs + (format_size >> 48),
- "PEBS record size %llu, expected %llu, config %llx\n",
- format_size >> 48,
- (u64)(next_record - __pebs),
- basic->format_size);
+ if (header->cntr) {
+ struct arch_pebs_cntr_header *cntr = next_record;
+ unsigned int nr;
+
+ next_record += sizeof(struct arch_pebs_cntr_header);
+
+ if (is_pebs_counter_event_group(event)) {
+ __setup_pebs_counter_group(cpuc, event,
+ (struct pebs_cntr_header *)cntr, next_record);
+ data->sample_flags |= PERF_SAMPLE_READ;
+ }
+
+ nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
+ if (cntr->metrics == INTEL_CNTR_METRICS)
+ nr += 2;
+ next_record += nr * sizeof(u64);
+ }
+
+ /* Parse followed fragments if there are. */
+ if (arch_pebs_record_continued(header)) {
+ at = at + header->size;
+ goto again;
+ }
}
static inline void *
@@ -2100,15 +2515,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
return NULL;
}
-void intel_pmu_auto_reload_read(struct perf_event *event)
-{
- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
-
- perf_pmu_disable(event->pmu);
- intel_pmu_drain_pebs_buffer();
- perf_pmu_enable(event->pmu);
-}
-
/*
* Special variant of intel_pmu_save_and_restart() for auto-reload.
*/
@@ -2129,7 +2535,7 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
WARN_ON(this_cpu_read(cpu_hw_events.enabled));
prev_raw_count = local64_read(&hwc->prev_count);
- rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+ new_raw_count = rdpmc(hwc->event_base_rdpmc);
local64_set(&hwc->prev_count, new_raw_count);
/*
@@ -2170,46 +2576,33 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
return 0;
}
+typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *,
+ struct perf_sample_data *, struct pt_regs *);
+
+static struct pt_regs dummy_iregs;
+
static __always_inline void
__intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs,
+ struct pt_regs *regs,
struct perf_sample_data *data,
- void *base, void *top,
- int bit, int count,
- void (*setup_sample)(struct perf_event *,
- struct pt_regs *,
- void *,
- struct perf_sample_data *,
- struct pt_regs *))
+ void *at,
+ setup_fn setup_sample)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- struct x86_perf_regs perf_regs;
- struct pt_regs *regs = &perf_regs.regs;
- void *at = get_next_pebs_record_by_bit(base, top, bit);
- static struct pt_regs dummy_iregs;
-
- if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
- /*
- * Now, auto-reload is only enabled in fixed period mode.
- * The reload value is always hwc->sample_period.
- * May need to change it, if auto-reload is enabled in
- * freq mode later.
- */
- intel_pmu_save_and_restart_reload(event, count);
- } else if (!intel_pmu_save_and_restart(event))
- return;
-
- if (!iregs)
- iregs = &dummy_iregs;
+ setup_sample(event, iregs, at, data, regs);
+ perf_event_output(event, data, regs);
+}
- while (count > 1) {
- setup_sample(event, iregs, at, data, regs);
- perf_event_output(event, data, regs);
- at += cpuc->pebs_record_size;
- at = get_next_pebs_record_by_bit(at, top, bit);
- count--;
- }
+static __always_inline void
+__intel_pmu_pebs_last_event(struct perf_event *event,
+ struct pt_regs *iregs,
+ struct pt_regs *regs,
+ struct perf_sample_data *data,
+ void *at,
+ int count,
+ setup_fn setup_sample)
+{
+ struct hw_perf_event *hwc = &event->hw;
setup_sample(event, iregs, at, data, regs);
if (iregs == &dummy_iregs) {
@@ -2225,9 +2618,71 @@ __intel_pmu_pebs_event(struct perf_event *event,
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
- if (perf_event_overflow(event, data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, data, regs);
+ }
+
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+ if ((is_pebs_counter_event_group(event))) {
+ /*
+ * The value of each sample has been updated when setup
+ * the corresponding sample data.
+ */
+ perf_event_update_userpage(event);
+ } else {
+ /*
+ * Now, auto-reload is only enabled in fixed period mode.
+ * The reload value is always hwc->sample_period.
+ * May need to change it, if auto-reload is enabled in
+ * freq mode later.
+ */
+ intel_pmu_save_and_restart_reload(event, count);
+ }
+ } else {
+ /*
+ * For a non-precise event, it's possible the
+ * counters-snapshotting records a positive value for the
+ * overflowed event. Then the HW auto-reload mechanism
+ * reset the counter to 0 immediately, because the
+ * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD
+ * is not set. The counter backwards may be observed in a
+ * PMI handler.
+ *
+ * Since the event value has been updated when processing the
+ * counters-snapshotting record, only needs to set the new
+ * period for the counter.
+ */
+ if (is_pebs_counter_event_group(event))
+ static_call(x86_pmu_set_period)(event);
+ else
+ intel_pmu_save_and_restart(event);
+ }
+}
+
+static __always_inline void
+__intel_pmu_pebs_events(struct perf_event *event,
+ struct pt_regs *iregs,
+ struct perf_sample_data *data,
+ void *base, void *top,
+ int bit, int count,
+ setup_fn setup_sample)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
+ void *at = get_next_pebs_record_by_bit(base, top, bit);
+ int cnt = count;
+
+ if (!iregs)
+ iregs = &dummy_iregs;
+
+ while (cnt > 1) {
+ __intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample);
+ at += cpuc->pebs_record_size;
+ at = get_next_pebs_record_by_bit(at, top, bit);
+ cnt--;
}
+
+ __intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
@@ -2264,12 +2719,13 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
return;
}
- __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
- setup_pebs_fixed_sample_data);
+ __intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
+ setup_pebs_fixed_sample_data);
}
-static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask)
{
+ u64 pebs_enabled = cpuc->pebs_enabled & mask;
struct perf_event *event;
int bit;
@@ -2280,7 +2736,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int
* It needs to call intel_pmu_save_and_restart_reload() to
* update the event->count for this case.
*/
- for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
+ for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) {
event = cpuc->events[bit];
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
intel_pmu_save_and_restart_reload(event, 0);
@@ -2315,7 +2771,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
}
if (unlikely(base >= top)) {
- intel_pmu_pebs_event_update_no_drain(cpuc, size);
+ intel_pmu_pebs_event_update_no_drain(cpuc, mask);
return;
}
@@ -2391,26 +2847,79 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
if (error[bit]) {
perf_log_lost_samples(event, error[bit]);
- if (iregs && perf_event_account_interrupt(event))
- x86_pmu_stop(event, 0);
+ if (iregs)
+ perf_event_account_interrupt(event);
}
if (counts[bit]) {
- __intel_pmu_pebs_event(event, iregs, data, base,
- top, bit, counts[bit],
- setup_pebs_fixed_sample_data);
+ __intel_pmu_pebs_events(event, iregs, data, base,
+ top, bit, counts[bit],
+ setup_pebs_fixed_sample_data);
+ }
+ }
+}
+
+static __always_inline void
+__intel_pmu_handle_pebs_record(struct pt_regs *iregs,
+ struct pt_regs *regs,
+ struct perf_sample_data *data,
+ void *at, u64 pebs_status,
+ short *counts, void **last,
+ setup_fn setup_sample)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *event;
+ int bit;
+
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
+ event = cpuc->events[bit];
+
+ if (WARN_ON_ONCE(!event) ||
+ WARN_ON_ONCE(!event->attr.precise_ip))
+ continue;
+
+ if (counts[bit]++) {
+ __intel_pmu_pebs_event(event, iregs, regs, data,
+ last[bit], setup_sample);
}
+
+ last[bit] = at;
+ }
+}
+
+static __always_inline void
+__intel_pmu_handle_last_pebs_record(struct pt_regs *iregs,
+ struct pt_regs *regs,
+ struct perf_sample_data *data,
+ u64 mask, short *counts, void **last,
+ setup_fn setup_sample)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *event;
+ int bit;
+
+ for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
+ if (!counts[bit])
+ continue;
+
+ event = cpuc->events[bit];
+
+ __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
+ counts[bit], setup_sample);
}
+
}
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
{
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
- struct perf_event *event;
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
+ struct pebs_basic *basic;
void *base, *at, *top;
- int bit;
u64 mask;
if (!x86_pmu.pebs_active)
@@ -2423,44 +2932,132 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
mask = hybrid(cpuc->pmu, pebs_events_mask) |
(hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
+ mask &= cpuc->pebs_enabled;
if (unlikely(base >= top)) {
- intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
+ intel_pmu_pebs_event_update_no_drain(cpuc, mask);
return;
}
- for (at = base; at < top; at += cpuc->pebs_record_size) {
+ if (!iregs)
+ iregs = &dummy_iregs;
+
+ /* Process all but the last event for each counter. */
+ for (at = base; at < top; at += basic->format_size) {
u64 pebs_status;
- pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
- pebs_status &= mask;
+ basic = at;
+ if (basic->format_size != cpuc->pebs_record_size)
+ continue;
- for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX)
- counts[bit]++;
+ pebs_status = mask & basic->applicable_counters;
+ __intel_pmu_handle_pebs_record(iregs, regs, data, at,
+ pebs_status, counts, last,
+ setup_pebs_adaptive_sample_data);
}
- for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
- if (counts[bit] == 0)
- continue;
+ __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last,
+ setup_pebs_adaptive_sample_data);
+}
- event = cpuc->events[bit];
- if (WARN_ON_ONCE(!event))
- continue;
+static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
+ struct perf_sample_data *data)
+{
+ short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ union arch_pebs_index index;
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
+ void *base, *at, *top;
+ u64 mask;
- if (WARN_ON_ONCE(!event->attr.precise_ip))
+ rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
+
+ if (unlikely(!index.wr)) {
+ intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
+ return;
+ }
+
+ base = cpuc->pebs_vaddr;
+ top = cpuc->pebs_vaddr + (index.wr << ARCH_PEBS_INDEX_WR_SHIFT);
+
+ index.wr = 0;
+ index.full = 0;
+ index.en = 1;
+ if (cpuc->n_pebs == cpuc->n_large_pebs)
+ index.thresh = ARCH_PEBS_THRESH_MULTI;
+ else
+ index.thresh = ARCH_PEBS_THRESH_SINGLE;
+ wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
+
+ mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
+
+ if (!iregs)
+ iregs = &dummy_iregs;
+
+ /* Process all but the last event for each counter. */
+ for (at = base; at < top;) {
+ struct arch_pebs_header *header;
+ struct arch_pebs_basic *basic;
+ u64 pebs_status;
+
+ header = at;
+
+ if (WARN_ON_ONCE(!header->size))
+ break;
+
+ /* 1st fragment or single record must have basic group */
+ if (!header->basic) {
+ at += header->size;
continue;
+ }
- __intel_pmu_pebs_event(event, iregs, data, base,
- top, bit, counts[bit],
- setup_pebs_adaptive_sample_data);
+ basic = at + sizeof(struct arch_pebs_header);
+ pebs_status = mask & basic->applicable_counters;
+ __intel_pmu_handle_pebs_record(iregs, regs, data, at,
+ pebs_status, counts, last,
+ setup_arch_pebs_sample_data);
+
+ /* Skip non-last fragments */
+ while (arch_pebs_record_continued(header)) {
+ if (!header->size)
+ break;
+ at += header->size;
+ header = at;
+ }
+
+ /* Skip last fragment or the single record */
+ at += header->size;
}
+
+ __intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
+ counts, last,
+ setup_arch_pebs_sample_data);
+}
+
+static void __init intel_arch_pebs_init(void)
+{
+ /*
+ * Current hybrid platforms always both support arch-PEBS or not
+ * on all kinds of cores. So directly set x86_pmu.arch_pebs flag
+ * if boot cpu supports arch-PEBS.
+ */
+ x86_pmu.arch_pebs = 1;
+ x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
+ x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
+ x86_pmu.pebs_capable = ~0ULL;
+ x86_pmu.flags |= PMU_FL_PEBS_ALL;
+
+ x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
+ x86_pmu.pebs_disable = __intel_pmu_pebs_disable;
}
/*
- * BTS, PEBS probe and setup
+ * PEBS probe and setup
*/
-void __init intel_ds_init(void)
+static void __init intel_ds_pebs_init(void)
{
/*
* No support for 32bit formats
@@ -2468,13 +3065,12 @@ void __init intel_ds_init(void)
if (!boot_cpu_has(X86_FEATURE_DTES64))
return;
- x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
- x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+ x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
if (x86_pmu.version <= 4)
x86_pmu.pebs_no_isolation = 1;
- if (x86_pmu.pebs) {
+ if (x86_pmu.ds_pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
char *pebs_qual = "";
int format = x86_pmu.intel_cap.pebs_format;
@@ -2482,6 +3078,11 @@ void __init intel_ds_init(void)
if (format < 4)
x86_pmu.intel_cap.pebs_baseline = 0;
+ x86_pmu.pebs_enable = intel_pmu_pebs_enable;
+ x86_pmu.pebs_disable = intel_pmu_pebs_disable;
+ x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all;
+ x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all;
+
switch (format) {
case 0:
pr_cont("PEBS fmt0%c, ", pebs_type);
@@ -2517,6 +3118,10 @@ void __init intel_ds_init(void)
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
break;
+ case 6:
+ if (x86_pmu.intel_cap.pebs_baseline)
+ x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
+ fallthrough;
case 5:
x86_pmu.pebs_ept = 1;
fallthrough;
@@ -2541,9 +3146,17 @@ void __init intel_ds_init(void)
PERF_SAMPLE_REGS_USER |
PERF_SAMPLE_REGS_INTR);
}
- pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+ pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual);
- if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
+ /*
+ * The PEBS-via-PT is not supported on hybrid platforms,
+ * because not all CPUs of a hybrid machine support it.
+ * The global x86_pmu.intel_cap, which only contains the
+ * common capabilities, is used to check the availability
+ * of the feature. The per-PMU pebs_output_pt_available
+ * in a hybrid machine should be ignored.
+ */
+ if (x86_pmu.intel_cap.pebs_output_pt_available) {
pr_cont("PEBS-via-PT, ");
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
}
@@ -2552,17 +3165,25 @@ void __init intel_ds_init(void)
default:
pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
- x86_pmu.pebs = 0;
+ x86_pmu.ds_pebs = 0;
}
}
}
+void __init intel_pebs_init(void)
+{
+ if (x86_pmu.intel_cap.pebs_format == 0xf)
+ intel_arch_pebs_init();
+ else
+ intel_ds_pebs_init();
+}
+
void perf_restore_debug_store(void)
{
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
- if (!x86_pmu.bts && !x86_pmu.pebs)
+ if (!x86_pmu.bts && !x86_pmu.ds_pebs)
return;
- wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
+ wrmsrq(MSR_IA32_DS_AREA, (unsigned long)ds);
}