diff options
Diffstat (limited to 'tools/perf/util/intel-pt.c')
-rw-r--r-- | tools/perf/util/intel-pt.c | 399 |
1 files changed, 315 insertions, 84 deletions
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index f38893e0b036..9b1011fe4826 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -127,6 +127,7 @@ struct intel_pt { bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; u64 evt_sample_type; @@ -175,6 +176,7 @@ enum switch_state { struct intel_pt_pebs_event { struct evsel *evsel; u64 id; + int data_src_fmt; }; struct intel_pt_queue { @@ -249,7 +251,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -598,15 +600,15 @@ static struct auxtrace_cache *intel_pt_cache(struct dso *dso, struct auxtrace_cache *c; unsigned int bits; - if (dso->auxtrace_cache) - return dso->auxtrace_cache; + if (dso__auxtrace_cache(dso)) + return dso__auxtrace_cache(dso); bits = intel_pt_cache_size(dso, machine); /* Ignoring cache creation failure */ c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); - dso->auxtrace_cache = c; + dso__set_auxtrace_cache(dso, c); return c; } @@ -650,7 +652,7 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) if (!c) return NULL; - return auxtrace_cache__lookup(dso->auxtrace_cache, offset); + return auxtrace_cache__lookup(dso__auxtrace_cache(dso), offset); } static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, @@ -661,7 +663,7 @@ static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, if (!c) return; - auxtrace_cache__remove(dso->auxtrace_cache, offset); + auxtrace_cache__remove(dso__auxtrace_cache(dso), offset); } static inline bool intel_pt_guest_kernel_ip(uint64_t ip) @@ -764,6 +766,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, addr_location__init(&al); intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; if (to_ip && *ip == to_ip) goto out_no_cache; @@ -820,8 +823,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, } dso = map__dso(al.map); - if (dso->data.status == DSO_DATA_STATUS_ERROR && - dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) { + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) { ret = -ENOENT; goto out_ret; } @@ -854,7 +857,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, /* Load maps to ensure dso->is_64_bit has been updated */ map__load(al.map); - x86_64 = dso->is_64_bit; + x86_64 = dso__is_64_bit(dso); while (1) { len = dso__data_read_offset(dso, machine, @@ -898,6 +901,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (to_ip && *ip == to_ip) { intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; goto out_no_cache; } @@ -1008,7 +1012,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) offset = map__map_ip(al.map, ip); - res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, map__dso(al.map)->long_name); + res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, dso__long_name(map__dso(al.map))); addr_location__exit(&al); return res; } @@ -1762,12 +1766,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct dummy_branch_stack { u64 nr; u64 hw_idx; struct branch_entry entries; } dummy_bs; + int ret; if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; @@ -1775,6 +1780,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_b_sample(pt, ptq, event, &sample); sample.id = ptq->pt->branches_id; @@ -1804,8 +1810,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_event(pt, event, &sample, + perf_sample__exit(&sample); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->branches_sample_type); + return ret; } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1833,11 +1841,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->instructions_id; @@ -1857,16 +1867,19 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->instructions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->instructions_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; u64 period = 0; + int ret; if (ptq->sample_ipc) period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt; @@ -1874,6 +1887,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) if (!period || intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cycles_id; @@ -1885,25 +1899,31 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt; ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->transactions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->transactions_sample_type); + perf_sample__exit(&sample); + return ret; } static void intel_pt_prep_p_sample(struct intel_pt *pt, @@ -1951,15 +1971,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_cbr raw; u32 flags; + int ret; if (intel_pt_skip_cbr_event(pt)) return 0; ptq->cbr_seen = ptq->state->cbr; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cbr_id; @@ -1973,20 +1995,24 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_psb raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->psb_id; @@ -1999,20 +2025,24 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_mwait raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->mwait_id; @@ -2024,20 +2054,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwre raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwre_id; @@ -2049,20 +2083,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_exstop raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->exstop_id; @@ -2074,20 +2112,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwrx raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwrx_id; @@ -2099,8 +2141,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } /* @@ -2230,19 +2274,160 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; u64 sample_type = evsel->core.attr.sample_type; u8 cpumode; - u64 regs[8 * sizeof(sample.intr_regs.mask)]; + u64 regs[8 * sizeof(sample.intr_regs->mask)]; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_a_sample(ptq, event, &sample); sample.id = id; @@ -2289,15 +2474,16 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse items->mask[INTEL_PT_XMM_POS])) { u64 regs_mask = evsel->core.attr.sample_regs_intr; u64 *pos; + struct regs_dump *intr_regs = perf_sample__intr_regs(&sample); - sample.intr_regs.abi = items->is_32_bit ? + intr_regs->abi = items->is_32_bit ? PERF_SAMPLE_REGS_ABI_32 : PERF_SAMPLE_REGS_ABI_64; - sample.intr_regs.regs = regs; + intr_regs->regs = regs; - pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask); - intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); + intel_pt_add_xmm(intr_regs, pos, items, regs_mask); } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -2348,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -2359,16 +2557,19 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; u64 id = evsel->core.id[0]; - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); } static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) @@ -2393,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) hw_id); return intel_pt_synth_single_pebs_sample(ptq); } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); if (err) return err; } @@ -2405,16 +2606,17 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct { struct perf_synth_intel_evt cfe; struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS]; } raw; - int i; + int i, ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->evt_id; @@ -2436,20 +2638,24 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->evt_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->evt_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_iflag_chg raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->iflag_chg_id; @@ -2469,8 +2675,10 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->iflag_chg_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->iflag_chg_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -3353,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -3373,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, ptq->pebs[hw_id].evsel = evsel; ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); return 0; } @@ -3416,7 +3668,7 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) } dso = map__dso(al.map); - if (!dso || !dso->auxtrace_cache) + if (!dso || !dso__auxtrace_cache(dso)) continue; offset = map__map_ip(al.map, addr); @@ -3436,7 +3688,7 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) } else { intel_pt_cache_invalidate(dso, machine, offset); intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", - dso->long_name, addr); + dso__long_name(dso), addr); } } out: @@ -3447,7 +3699,7 @@ out: static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3531,7 +3783,7 @@ static int intel_pt_process_event(struct perf_session *session, return err; } -static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) +static int intel_pt_flush(struct perf_session *session, const struct perf_tool *tool) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3598,7 +3850,7 @@ static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3657,37 +3909,15 @@ static int intel_pt_queue_data(struct perf_session *session, data_offset, timestamp); } -struct intel_pt_synth { - struct perf_tool dummy_tool; - struct perf_session *session; -}; - -static int intel_pt_event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - struct intel_pt_synth *intel_pt_synth = - container_of(tool, struct intel_pt_synth, dummy_tool); - - return perf_session__deliver_synth_event(intel_pt_synth->session, event, - NULL); -} - static int intel_pt_synth_event(struct perf_session *session, const char *name, struct perf_event_attr *attr, u64 id) { - struct intel_pt_synth intel_pt_synth; int err; pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n", name, id, (u64)attr->sample_type); - memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); - intel_pt_synth.session = session; - - err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, - &id, intel_pt_event_synth); + err = perf_session__deliver_synth_attr_event(session, attr, id); if (err) pr_err("%s: failed to synthesize '%s' event type\n", __func__, name); @@ -3944,6 +4174,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) } pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; } } @@ -4130,7 +4361,7 @@ static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", |