summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-05-14 07:56:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-05-14 07:56:51 -0700
commitef21831c2e4e66cb948d5107f47c1aa0a5711a56 (patch)
treec349aef6589d20d9023eb3fc031165c051da9bb7
parentf3b9e8e4c84845d09dbba49f664fc8407f18a3dc (diff)
parentb752ea0c28e3f7f0aaaad6abf84f735eebc37a60 (diff)
Merge tag 'perf_urgent_for_v6.4_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Borislav Petkov: - Make sure the PEBS buffer is flushed before reprogramming the hardware so that the correct record sizes are used - Update the sample size for AMD BRS events - Fix a confusion with using the same on-stack struct with different events in the event processing path * tag 'perf_urgent_for_v6.4_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel/ds: Flush PEBS DS when changing PEBS_DATA_CFG perf/x86: Fix missing sample size update on AMD BRS perf/core: Fix perf_sample_data not properly initialized for different swevents in perf_tp_event()
-rw-r--r--arch/x86/events/core.c6
-rw-r--r--arch/x86/events/intel/ds.c56
-rw-r--r--arch/x86/include/asm/perf_event.h3
-rw-r--r--kernel/events/core.c14
4 files changed, 50 insertions, 29 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d096b04bf80e..9d248703cbdd 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1703,10 +1703,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
perf_sample_data_init(&data, 0, event->hw.last_period);
- if (has_branch_stack(event)) {
- data.br_stack = &cpuc->lbr_stack;
- data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
- }
+ if (has_branch_stack(event))
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a2e566e53076..df88576d6b2a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1229,12 +1229,14 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct perf_event *event, bool add)
{
struct pmu *pmu = event->pmu;
+
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
* that does not hurt:
*/
- bool update = cpuc->n_pebs == 1;
+ if (cpuc->n_pebs == 1)
+ cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
if (!needed_cb)
@@ -1242,7 +1244,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
else
perf_sched_cb_dec(pmu);
- update = true;
+ cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
}
/*
@@ -1252,24 +1254,13 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
if (x86_pmu.intel_cap.pebs_baseline && add) {
u64 pebs_data_cfg;
- /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
- if (cpuc->n_pebs == 1) {
- cpuc->pebs_data_cfg = 0;
- cpuc->pebs_record_size = sizeof(struct pebs_basic);
- }
-
pebs_data_cfg = pebs_update_adaptive_cfg(event);
-
- /* Update pebs_record_size if new event requires more data. */
- if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
- cpuc->pebs_data_cfg |= pebs_data_cfg;
- adaptive_pebs_record_size_update();
- update = true;
- }
+ /*
+ * Be sure to update the thresholds when we change the record.
+ */
+ if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
+ cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
}
-
- if (update)
- pebs_update_threshold(cpuc);
}
void intel_pmu_pebs_add(struct perf_event *event)
@@ -1326,9 +1317,17 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
wrmsrl(base + idx, value);
}
+static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
+{
+ if (cpuc->n_pebs == cpuc->n_large_pebs &&
+ cpuc->n_pebs != cpuc->n_pebs_via_pt)
+ intel_pmu_drain_pebs_buffer();
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;
unsigned int idx = hwc->idx;
@@ -1344,11 +1343,22 @@ void intel_pmu_pebs_enable(struct perf_event *event)
if (x86_pmu.intel_cap.pebs_baseline) {
hwc->config |= ICL_EVENTSEL_ADAPTIVE;
- if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
- wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
- cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
+ if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
+ /*
+ * drain_pebs() assumes uniform record size;
+ * hence we need to drain when changing said
+ * size.
+ */
+ intel_pmu_drain_large_pebs(cpuc);
+ adaptive_pebs_record_size_update();
+ wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
+ cpuc->active_pebs_data_cfg = pebs_data_cfg;
}
}
+ if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
+ cpuc->pebs_data_cfg = pebs_data_cfg;
+ pebs_update_threshold(cpuc);
+ }
if (idx >= INTEL_PMC_IDX_FIXED) {
if (x86_pmu.intel_cap.pebs_format < 5)
@@ -1391,9 +1401,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- if (cpuc->n_pebs == cpuc->n_large_pebs &&
- cpuc->n_pebs != cpuc->n_pebs_via_pt)
- intel_pmu_drain_pebs_buffer();
+ intel_pmu_drain_large_pebs(cpuc);
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8fc15ed5e60b..abf09882f58b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -121,6 +121,9 @@
#define PEBS_DATACFG_LBRS BIT_ULL(3)
#define PEBS_DATACFG_LBR_SHIFT 24
+/* Steal the highest bit of pebs_data_cfg for SW usage */
+#define PEBS_UPDATE_DS_SW BIT_ULL(63)
+
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 68baa8194d9f..db016e418931 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10150,8 +10150,20 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
perf_trace_buf_update(record, event_type);
hlist_for_each_entry_rcu(event, head, hlist_entry) {
- if (perf_tp_event_match(event, &data, regs))
+ if (perf_tp_event_match(event, &data, regs)) {
perf_swevent_event(event, count, &data, regs);
+
+ /*
+ * Here use the same on-stack perf_sample_data,
+ * some members in data are event-specific and
+ * need to be re-computed for different sweveents.
+ * Re-initialize data->sample_flags safely to avoid
+ * the problem that next event skips preparing data
+ * because data->sample_flags is set.
+ */
+ perf_sample_data_init(&data, 0, 0);
+ perf_sample_save_raw_data(&data, &raw);
+ }
}
/*