summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/events/intel/ds.c51
-rw-r--r--arch/x86/events/perf_event.h2
-rw-r--r--include/uapi/linux/perf_event.h30
4 files changed, 64 insertions, 21 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index c3439a36dcf9..6f342001ec6a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4208,6 +4208,8 @@ __init int intel_pmu_init(void)
skl_format_attr);
WARN_ON(!x86_pmu.format_attrs);
x86_pmu.cpu_events = hsw_events_attrs;
+ intel_pmu_pebs_data_source_skl(
+ boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
pr_cont("Skylake events, ");
break;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3ccdf8cb4495..98e36e0c791c 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -49,34 +49,47 @@ union intel_x86_pebs_dse {
*/
#define P(a, b) PERF_MEM_S(a, b)
#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
/* Version for Sandy Bridge and later */
static u64 pebs_data_source[] = {
- P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
- OP_LH | P(LVL, L1) | P(SNOOP, NONE), /* 0x01: L1 local */
- OP_LH | P(LVL, LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
- OP_LH | P(LVL, L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
- OP_LH | P(LVL, L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
- OP_LH | P(LVL, L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
- OP_LH | P(LVL, L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
- OP_LH | P(LVL, L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
- OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
- OP_LH | P(LVL, REM_CCE1) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
- OP_LH | P(LVL, LOC_RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
- OP_LH | P(LVL, REM_RAM1) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
- OP_LH | P(LVL, LOC_RAM) | SNOOP_NONE_MISS,/* 0x0c: L3 miss, excl */
- OP_LH | P(LVL, REM_RAM1) | SNOOP_NONE_MISS,/* 0x0d: L3 miss, excl */
- OP_LH | P(LVL, IO) | P(SNOOP, NONE), /* 0x0e: I/O */
- OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x04: L3 hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* 0x05: L3 hit, snoop miss */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* 0x06: L3 hit, snoop hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x07: L3 hit, snoop hitm */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x08: L3 miss snoop hit */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* 0x09: L3 miss snoop hitm*/
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* 0x0a: L3 miss, shared */
+ OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* 0x0b: L3 miss, shared */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* 0x0c: L3 miss, excl */
+ OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* 0x0d: L3 miss, excl */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0e: I/O */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
};
/* Patch up minor differences in the bits */
void __init intel_pmu_pebs_data_source_nhm(void)
{
- pebs_data_source[0x05] = OP_LH | P(LVL, L3) | P(SNOOP, HIT);
- pebs_data_source[0x06] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
- pebs_data_source[0x07] = OP_LH | P(LVL, L3) | P(SNOOP, HITM);
+ pebs_data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+ pebs_data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+ pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+}
+
+void __init intel_pmu_pebs_data_source_skl(bool pmem)
+{
+ u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4);
+
+ pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
+ pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT);
+ pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
+ pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD);
+ pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM);
}
static u64 precise_store_data(u64 status)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 2e9636e4068f..0f7dad8bd358 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -948,6 +948,8 @@ void intel_pmu_lbr_init_knl(void);
void intel_pmu_pebs_data_source_nhm(void);
+void intel_pmu_pebs_data_source_skl(bool pmem);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 642db5fa3286..2a37ae925d85 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -954,14 +954,20 @@ union perf_mem_data_src {
mem_snoop:5, /* snoop mode */
mem_lock:2, /* lock instr */
mem_dtlb:7, /* tlb access */
- mem_rsvd:31;
+ mem_lvl_num:4, /* memory hierarchy level number */
+ mem_remote:1, /* remote */
+ mem_snoopx:2, /* snoop mode, ext */
+ mem_rsvd:24;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:31,
+ __u64 mem_rsvd:24,
+ mem_snoopx:2, /* snoop mode, ext */
+ mem_remote:1, /* remote */
+ mem_lvl_num:4, /* memory hierarchy level number */
mem_dtlb:7, /* tlb access */
mem_lock:2, /* lock instr */
mem_snoop:5, /* snoop mode */
@@ -998,6 +1004,22 @@ union perf_mem_data_src {
#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
#define PERF_MEM_LVL_SHIFT 5
+#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
+#define PERF_MEM_REMOTE_SHIFT 37
+
+#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
+#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
+#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
+#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
+/* 5-0xa available */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
+#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
+#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT 33
+
/* snoop mode */
#define PERF_MEM_SNOOP_NA 0x01 /* not available */
#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
@@ -1006,6 +1028,10 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
#define PERF_MEM_SNOOP_SHIFT 19
+#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
+/* 1 free */
+#define PERF_MEM_SNOOPX_SHIFT 37
+
/* locked instruction */
#define PERF_MEM_LOCK_NA 0x01 /* not available */
#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */