summaryrefslogtreecommitdiff
path: root/arch/x86/events/perf_event.h
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2022-03-22 15:15:07 -0700
committerPeter Zijlstra <peterz@infradead.org>2022-04-05 10:24:37 +0200
commitada543459cab7f653dcacdaba4011a8bb19c627c (patch)
tree0881e333504fb5883bcfdc57b7c733880d1faba3 /arch/x86/events/perf_event.h
parenta77d41ac3a0f41c80120ec5b8b08ab284fec950a (diff)
perf/x86/amd: Add AMD Fam19h Branch Sampling support
Add support for the AMD Fam19h 16-deep branch sampling feature as described in the AMD PPR Fam19h Model 01h Revision B1. This is a model specific extension. It is not an architected AMD feature. The Branch Sampling (BRS) operates with a 16-deep saturating buffer in MSR registers. There is no branch type filtering. All control flow changes are captured. BRS relies on specific programming of the core PMU of Fam19h. In particular, the following requirements must be met: - the sampling period be greater than 16 (BRS depth) - the sampling period must use a fixed and not frequency mode BRS interacts with the NMI interrupt as well. Because enabling BRS is expensive, it is only activated after P event occurrences, where P is the desired sampling period. At P occurrences of the event, the counter overflows, the CPU catches the interrupt, activates BRS for 16 branches until it saturates, and then delivers the NMI to the kernel. Between the overflow and the time BRS activates more branches may be executed skewing the period. All along, the sampling event keeps counting. The skid may be attenuated by reducing the sampling period by 16 (subsequent patch). BRS is integrated into perf_events seamlessly via the same PERF_RECORD_BRANCH_STACK sample format. BRS generates perf_branch_entry records in the sampling buffer. No prediction information is supported. The branches are stored in reverse order of execution. The most recent branch is the first entry in each record. No modification to the perf tool is necessary. BRS can be used with any sampling event. However, it is recommended to use the RETIRED_BRANCH_INSTRUCTIONS event because it matches what the BRS captures. $ perf record -b -c 1000037 -e cpu/event=0xc2,name=ret_br_instructions/ test $ perf report -D 56531696056126 0x193c000 [0x1a8]: PERF_RECORD_SAMPLE(IP, 0x2): 18122/18230: 0x401d24 period: 1000037 addr: 0 ... branch stack: nr:16 ..... 0: 0000000000401d24 -> 0000000000401d5a 0 cycles 0 ..... 1: 0000000000401d5c -> 0000000000401d24 0 cycles 0 ..... 2: 0000000000401d22 -> 0000000000401d5c 0 cycles 0 ..... 3: 0000000000401d5e -> 0000000000401d22 0 cycles 0 ..... 4: 0000000000401d20 -> 0000000000401d5e 0 cycles 0 ..... 5: 0000000000401d3e -> 0000000000401d20 0 cycles 0 ..... 6: 0000000000401d42 -> 0000000000401d3e 0 cycles 0 ..... 7: 0000000000401d3c -> 0000000000401d42 0 cycles 0 ..... 8: 0000000000401d44 -> 0000000000401d3c 0 cycles 0 ..... 9: 0000000000401d3a -> 0000000000401d44 0 cycles 0 ..... 10: 0000000000401d46 -> 0000000000401d3a 0 cycles 0 ..... 11: 0000000000401d38 -> 0000000000401d46 0 cycles 0 ..... 12: 0000000000401d48 -> 0000000000401d38 0 cycles 0 ..... 13: 0000000000401d36 -> 0000000000401d48 0 cycles 0 ..... 14: 0000000000401d4a -> 0000000000401d36 0 cycles 0 ..... 15: 0000000000401d34 -> 0000000000401d4a 0 cycles 0 ... thread: test:18230 ...... dso: test Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20220322221517.2510440-4-eranian@google.com
Diffstat (limited to 'arch/x86/events/perf_event.h')
-rw-r--r--arch/x86/events/perf_event.h101
1 files changed, 85 insertions, 16 deletions
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 150261d929b9..6f1265163c9f 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -67,22 +67,23 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
/*
* struct hw_perf_event.flags flags
*/
-#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */
-
-#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
-#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
-#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
-#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
-#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
-#define PERF_X86_EVENT_PEBS_STLAT 0x8000 /* st+stlat data address sampling */
+#define PERF_X86_EVENT_PEBS_LDLAT 0x00001 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST 0x00002 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x00004 /* haswell style datala, store */
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x00008 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x00010 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL 0x00020 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC 0x00040 /* dynamic alloc'd constraint */
+
+#define PERF_X86_EVENT_EXCL_ACCT 0x00100 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD 0x00200 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_LARGE_PEBS 0x00400 /* use large PEBS */
+#define PERF_X86_EVENT_PEBS_VIA_PT 0x00800 /* use PT buffer for PEBS */
+#define PERF_X86_EVENT_PAIR 0x01000 /* Large Increment per Cycle */
+#define PERF_X86_EVENT_LBR_SELECT 0x02000 /* Save/Restore MSR_LBR_SELECT */
+#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */
+#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */
+#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */
static inline bool is_topdown_count(struct perf_event *event)
{
@@ -325,6 +326,8 @@ struct cpu_hw_events {
* AMD specific bits
*/
struct amd_nb *amd_nb;
+ int brs_active; /* BRS is enabled */
+
/* Inverted mask of bits to clear in the perf_ctr ctrl registers */
u64 perf_ctr_virt_mask;
int n_pair; /* Large increment events */
@@ -1105,6 +1108,11 @@ int x86_pmu_hw_config(struct perf_event *event);
void x86_pmu_disable_all(void);
+static inline bool has_amd_brs(struct hw_perf_event *hwc)
+{
+ return hwc->flags & PERF_X86_EVENT_AMD_BRS;
+}
+
static inline bool is_counter_pair(struct hw_perf_event *hwc)
{
return hwc->flags & PERF_X86_EVENT_PAIR;
@@ -1210,6 +1218,50 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
+int amd_brs_init(void);
+void amd_brs_disable(void);
+void amd_brs_enable(void);
+void amd_brs_enable_all(void);
+void amd_brs_disable_all(void);
+void amd_brs_drain(void);
+void amd_brs_disable_all(void);
+int amd_brs_setup_filter(struct perf_event *event);
+void amd_brs_reset(void);
+
+static inline void amd_pmu_brs_add(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ perf_sched_cb_inc(event->ctx->pmu);
+ cpuc->lbr_users++;
+ /*
+ * No need to reset BRS because it is reset
+ * on brs_enable() and it is saturating
+ */
+}
+
+static inline void amd_pmu_brs_del(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ cpuc->lbr_users--;
+ WARN_ON_ONCE(cpuc->lbr_users < 0);
+
+ perf_sched_cb_dec(event->ctx->pmu);
+}
+
+void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
+/*
+ * check if BRS is activated on the CPU
+ * active defined as it has non-zero users and DBG_EXT_CFG.BRSEN=1
+ */
+static inline bool amd_brs_active(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ return cpuc->brs_active;
+}
#else /* CONFIG_CPU_SUP_AMD */
@@ -1218,6 +1270,23 @@ static inline int amd_pmu_init(void)
return 0;
}
+static inline int amd_brs_init(void)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void amd_brs_drain(void)
+{
+}
+
+static inline void amd_brs_enable_all(void)
+{
+}
+
+static inline void amd_brs_disable_all(void)
+{
+}
+
#endif /* CONFIG_CPU_SUP_AMD */
static inline int is_pebs_pt(struct perf_event *event)