diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 49 |
3 files changed, 54 insertions, 2 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index f3a9a94e4d22..ba9aadfa683b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -66,6 +66,7 @@ struct event_constraint { * struct event_constraint flags */ #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ +#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -242,6 +243,10 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) +#define INTEL_PST_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ae6096b175b9..e84c4ba44b59 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -163,6 +163,7 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = { EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), @@ -171,6 +172,7 @@ struct attribute *nhm_events_attrs[] = { struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), + EVENT_PTR(mem_st_snb), NULL, }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index a6400bd0463c..36dc13d1ad02 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -69,6 +69,44 @@ static const u64 pebs_data_source[] = { OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */ }; +static u64 precise_store_data(u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2); + + dse.val = status; + + /* + * bit 4: TLB access + * 1 = stored missed 2nd level TLB + * + * so it either hit the walker or the OS + * otherwise hit 2nd level TLB + */ + if (dse.st_stlb_miss) + val |= P(TLB, MISS); + else + val |= P(TLB, HIT); + + /* + * bit 0: hit L1 data cache + * if not set, then all we know is that + * it missed L1D + */ + if (dse.st_l1d_hit) + val |= P(LVL, HIT); + else + val |= P(LVL, MISS); + + /* + * bit 5: Locked prefix + */ + if (dse.st_locked) + val |= P(LOCK, LOCKED); + + return val; +} + static u64 load_latency_data(u64 status) { union intel_x86_pebs_dse dse; @@ -486,6 +524,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ + INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -500,6 +539,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ + INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */ INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ @@ -537,6 +577,8 @@ void intel_pmu_pebs_enable(struct perf_event *event) if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32); + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST) + cpuc->pebs_enabled |= 1ULL << 63; } void intel_pmu_pebs_disable(struct perf_event *event) @@ -657,12 +699,13 @@ static void __intel_pmu_pebs_event(struct perf_event *event, struct perf_sample_data data; struct pt_regs regs; u64 sample_type; - int fll; + int fll, fst; if (!intel_pmu_save_and_restart(event)) return; fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; + fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; perf_sample_data_init(&data, 0, event->hw.last_period); @@ -672,7 +715,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, /* * if PEBS-LL or PreciseStore */ - if (fll) { + if (fll || fst) { if (sample_type & PERF_SAMPLE_ADDR) data.addr = pebs->dla; @@ -688,6 +731,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) { if (fll) data.data_src.val = load_latency_data(pebs->dse); + else + data.data_src.val = precise_store_data(pebs->dse); } } |