diff options
Diffstat (limited to 'arch/x86/events')
29 files changed, 1695 insertions, 816 deletions
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index 780acd3dff22..06f35a6b58a5 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -44,12 +44,12 @@ static inline unsigned int brs_to(int idx) static __always_inline void set_debug_extn_cfg(u64 val) { /* bits[4:3] must always be set to 11b */ - __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32); + native_wrmsrq(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3); } static __always_inline u64 get_debug_extn_cfg(void) { - return __rdmsr(MSR_AMD_DBG_EXTN_CFG); + return native_rdmsrq(MSR_AMD_DBG_EXTN_CFG); } static bool __init amd_brs_detect(void) @@ -187,7 +187,7 @@ void amd_brs_reset(void) /* * Mark first entry as poisoned */ - wrmsrl(brs_to(0), BRS_POISON); + wrmsrq(brs_to(0), BRS_POISON); } int __init amd_brs_init(void) @@ -325,7 +325,7 @@ void amd_brs_drain(void) u32 brs_idx = tos - i; u64 from, to; - rdmsrl(brs_to(brs_idx), to); + rdmsrq(brs_to(brs_idx), to); /* Entry does not belong to us (as marked by kernel) */ if (to == BRS_POISON) @@ -341,7 +341,7 @@ void amd_brs_drain(void) if (!amd_brs_match_plm(event, to)) continue; - rdmsrl(brs_from(brs_idx), from); + rdmsrq(brs_from(brs_idx), from); perf_clear_branch_entry_bitfields(br+nr); @@ -371,7 +371,7 @@ static void amd_brs_poison_buffer(void) idx = amd_brs_get_tos(&cfg); /* Poison target of entry */ - wrmsrl(brs_to(idx), BRS_POISON); + wrmsrq(brs_to(idx), BRS_POISON); } /* @@ -381,7 +381,8 @@ static void amd_brs_poison_buffer(void) * On ctxswin, sched_in = true, called after the PMU has started * On ctxswout, sched_in = false, called before the PMU is stopped */ -void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 30d6ceb4c8ad..b20661b8621d 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -9,6 +9,7 @@ #include <linux/jiffies.h> #include <asm/apicdef.h> #include <asm/apic.h> +#include <asm/msr.h> #include <asm/nmi.h> #include "../perf_event.h" @@ -563,13 +564,13 @@ static void amd_pmu_cpu_reset(int cpu) return; /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); /* * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze * and PerfCntrGLobalStatus.PerfCntrOvfl */ - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask); } @@ -651,7 +652,7 @@ static void amd_pmu_cpu_dead(int cpu) static __always_inline void amd_pmu_set_global_ctl(u64 ctl) { - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); } static inline u64 amd_pmu_get_global_status(void) @@ -659,7 +660,7 @@ static inline u64 amd_pmu_get_global_status(void) u64 status; /* PerfCntrGlobalStatus is read-only */ - rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); + rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); return status; } @@ -672,14 +673,14 @@ static inline void amd_pmu_ack_global_status(u64 status) * clears the same bit in PerfCntrGlobalStatus */ - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); } static bool amd_pmu_test_overflow_topbit(int idx) { u64 counter; - rdmsrl(x86_pmu_event_addr(idx), counter); + rdmsrq(x86_pmu_event_addr(idx), counter); return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1)); } @@ -1003,8 +1004,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index e7a8b8758e08..112f43b23ebf 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -15,6 +15,7 @@ #include <linux/sched/clock.h> #include <asm/apic.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -26,10 +27,7 @@ static u32 ibs_caps; #include <linux/hardirq.h> #include <asm/nmi.h> -#include <asm/amd-ibs.h> - -#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) -#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT +#include <asm/amd/ibs.h> /* attr.config2 */ #define IBS_SW_FILTER_MASK 1 @@ -89,6 +87,7 @@ struct perf_ibs { u64 cnt_mask; u64 enable_mask; u64 valid_mask; + u16 min_period; u64 max_period; unsigned long offset_mask[1]; int offset_max; @@ -270,11 +269,19 @@ static int validate_group(struct perf_event *event) return 0; } +static bool perf_ibs_ldlat_event(struct perf_ibs *perf_ibs, + struct perf_event *event) +{ + return perf_ibs == &perf_ibs_op && + (ibs_caps & IBS_CAPS_OPLDLAT) && + (event->attr.config1 & 0xFFF); +} + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs; - u64 max_cnt, config; + u64 config; int ret; perf_ibs = get_ibs_pmu(event->attr.type); @@ -310,25 +317,47 @@ static int perf_ibs_init(struct perf_event *event) if (config & perf_ibs->cnt_mask) /* raw max_cnt may not be set */ return -EINVAL; - if (!event->attr.sample_freq && hwc->sample_period & 0x0f) - /* - * lower 4 bits can not be set in ibs max cnt, - * but allowing it in case we adjust the - * sample period to set a frequency. - */ - return -EINVAL; - hwc->sample_period &= ~0x0FULL; - if (!hwc->sample_period) - hwc->sample_period = 0x10; + + if (event->attr.freq) { + hwc->sample_period = perf_ibs->min_period; + } else { + /* Silently mask off lower nibble. IBS hw mandates it. */ + hwc->sample_period &= ~0x0FULL; + if (hwc->sample_period < perf_ibs->min_period) + return -EINVAL; + } } else { - max_cnt = config & perf_ibs->cnt_mask; + u64 period = 0; + + if (event->attr.freq) + return -EINVAL; + + if (perf_ibs == &perf_ibs_op) { + period = (config & IBS_OP_MAX_CNT) << 4; + if (ibs_caps & IBS_CAPS_OPCNTEXT) + period |= config & IBS_OP_MAX_CNT_EXT_MASK; + } else { + period = (config & IBS_FETCH_MAX_CNT) << 4; + } + config &= ~perf_ibs->cnt_mask; - event->attr.sample_period = max_cnt << 4; - hwc->sample_period = event->attr.sample_period; + event->attr.sample_period = period; + hwc->sample_period = period; + + if (hwc->sample_period < perf_ibs->min_period) + return -EINVAL; } - if (!hwc->sample_period) - return -EINVAL; + if (perf_ibs_ldlat_event(perf_ibs, event)) { + u64 ldlat = event->attr.config1 & 0xFFF; + + if (ldlat < 128 || ldlat > 2048) + return -EINVAL; + ldlat >>= 7; + + config |= (ldlat - 1) << 59; + config |= IBS_OP_L3MISSONLY | IBS_OP_LDLAT_EN; + } /* * If we modify hwc->sample_period, we also need to update @@ -349,7 +378,8 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs, int overflow; /* ignore lower 4 bits in min count: */ - overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); + overflow = perf_event_set_period(hwc, perf_ibs->min_period, + perf_ibs->max_period, period); local64_set(&hwc->prev_count, 0); return overflow; @@ -395,7 +425,7 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, * prev count manually on overflow. */ while (!perf_event_try_update(event, count, 64)) { - rdmsrl(event->hw.config_base, *config); + rdmsrq(event->hw.config_base, *config); count = perf_ibs->get_count(*config); } } @@ -406,9 +436,9 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, u64 tmp = hwc->config | config; if (perf_ibs->fetch_count_reset_broken) - wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); + wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask); - wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); + wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask); } /* @@ -423,9 +453,9 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, { config &= ~perf_ibs->cnt_mask; if (boot_cpu_data.x86 == 0x10) - wrmsrl(hwc->config_base, config); + wrmsrq(hwc->config_base, config); config &= ~perf_ibs->enable_mask; - wrmsrl(hwc->config_base, config); + wrmsrq(hwc->config_base, config); } /* @@ -447,6 +477,9 @@ static void perf_ibs_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; + if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) + hwc->sample_period = perf_ibs->min_period; + perf_ibs_set_period(perf_ibs, hwc, &period); if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { config |= period & IBS_OP_MAX_CNT_EXT_MASK; @@ -481,7 +514,7 @@ static void perf_ibs_stop(struct perf_event *event, int flags) if (!stopping && (hwc->state & PERF_HES_UPTODATE)) return; - rdmsrl(hwc->config_base, config); + rdmsrq(hwc->config_base, config); if (stopping) { /* @@ -554,6 +587,28 @@ static void perf_ibs_del(struct perf_event *event, int flags) static void perf_ibs_read(struct perf_event *event) { } +static int perf_ibs_check_period(struct perf_event *event, u64 value) +{ + struct perf_ibs *perf_ibs; + u64 low_nibble; + + if (event->attr.freq) + return 0; + + perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + low_nibble = value & 0xFULL; + + /* + * This contradicts with perf_ibs_init() which allows sample period + * with lower nibble bits set but silently masks them off. Whereas + * this returns error. + */ + if (low_nibble || value < perf_ibs->min_period) + return -EINVAL; + + return 0; +} + /* * We need to initialize with empty group if all attributes in the * group are dynamic. @@ -572,7 +627,10 @@ PMU_FORMAT_ATTR(cnt_ctl, "config:19"); PMU_FORMAT_ATTR(swfilt, "config2:0"); PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); +PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11"); PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); +PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1"); +PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1"); static umode_t zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) @@ -580,6 +638,18 @@ zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; } +static umode_t +ibs_op_ldlat_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_OPLDLAT ? attr->mode : 0; +} + +static umode_t +ibs_op_dtlb_pgsize_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_OPDTLBPGSIZE ? attr->mode : 0; +} + static struct attribute *fetch_attrs[] = { &format_attr_rand_en.attr, &format_attr_swfilt.attr, @@ -596,6 +666,16 @@ static struct attribute *zen4_ibs_extensions_attrs[] = { NULL, }; +static struct attribute *ibs_op_ldlat_cap_attrs[] = { + &ibs_op_ldlat_cap.attr.attr, + NULL, +}; + +static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = { + &ibs_op_dtlb_pgsize_cap.attr.attr, + NULL, +}; + static struct attribute_group group_fetch_formats = { .name = "format", .attrs = fetch_attrs, @@ -613,6 +693,18 @@ static struct attribute_group group_zen4_ibs_extensions = { .is_visible = zen4_ibs_extensions_is_visible, }; +static struct attribute_group group_ibs_op_ldlat_cap = { + .name = "caps", + .attrs = ibs_op_ldlat_cap_attrs, + .is_visible = ibs_op_ldlat_is_visible, +}; + +static struct attribute_group group_ibs_op_dtlb_pgsize_cap = { + .name = "caps", + .attrs = ibs_op_dtlb_pgsize_cap_attrs, + .is_visible = ibs_op_dtlb_pgsize_is_visible, +}; + static const struct attribute_group *fetch_attr_groups[] = { &group_fetch_formats, &empty_caps_group, @@ -651,6 +743,11 @@ static struct attribute_group group_op_formats = { .attrs = op_attrs, }; +static struct attribute *ibs_op_ldlat_format_attrs[] = { + &ibs_op_ldlat_format.attr.attr, + NULL, +}; + static struct attribute_group group_cnt_ctl = { .name = "format", .attrs = cnt_ctl_attrs, @@ -669,10 +766,19 @@ static const struct attribute_group *op_attr_groups[] = { NULL, }; +static struct attribute_group group_ibs_op_ldlat_format = { + .name = "format", + .attrs = ibs_op_ldlat_format_attrs, + .is_visible = ibs_op_ldlat_is_visible, +}; + static const struct attribute_group *op_attr_update[] = { &group_cnt_ctl, &group_op_l3missonly, &group_zen4_ibs_extensions, + &group_ibs_op_ldlat_cap, + &group_ibs_op_ldlat_format, + &group_ibs_op_dtlb_pgsize_cap, NULL, }; @@ -686,12 +792,14 @@ static struct perf_ibs perf_ibs_fetch = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, + .check_period = perf_ibs_check_period, }, .msr = MSR_AMD64_IBSFETCHCTL, - .config_mask = IBS_FETCH_CONFIG_MASK, + .config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN, .cnt_mask = IBS_FETCH_MAX_CNT, .enable_mask = IBS_FETCH_ENABLE, .valid_mask = IBS_FETCH_VAL, + .min_period = 0x10, .max_period = IBS_FETCH_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, @@ -709,13 +817,15 @@ static struct perf_ibs perf_ibs_op = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, + .check_period = perf_ibs_check_period, }, .msr = MSR_AMD64_IBSOPCTL, - .config_mask = IBS_OP_CONFIG_MASK, + .config_mask = IBS_OP_MAX_CNT, .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | IBS_OP_CUR_CNT_RAND, .enable_mask = IBS_OP_ENABLE, .valid_mask = IBS_OP_VAL, + .min_period = 0x90, .max_period = IBS_OP_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, .offset_max = MSR_AMD64_IBSOP_REG_COUNT, @@ -917,6 +1027,10 @@ static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, if (!op_data3->dc_lin_addr_valid) return; + if ((ibs_caps & IBS_CAPS_OPDTLBPGSIZE) && + !op_data3->dc_phy_addr_valid) + return; + if (!op_data3->dc_l1tlb_miss) { data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; return; @@ -941,6 +1055,8 @@ static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, data_src->mem_lock = PERF_MEM_LOCK_LOCKED; } +/* Be careful. Works only for contiguous MSRs. */ +#define ibs_fetch_msr_idx(msr) (msr - MSR_AMD64_IBSFETCHCTL) #define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, @@ -1021,21 +1137,92 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type, } } -static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, +static bool perf_ibs_is_mem_sample_type(struct perf_ibs *perf_ibs, + struct perf_event *event) +{ + u64 sample_type = event->attr.sample_type; + + return perf_ibs == &perf_ibs_op && + sample_type & (PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_WEIGHT_TYPE | + PERF_SAMPLE_ADDR | + PERF_SAMPLE_PHYS_ADDR); +} + +static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, + struct perf_event *event, int check_rip) { - if (sample_type & PERF_SAMPLE_RAW || - (perf_ibs == &perf_ibs_op && - (sample_type & PERF_SAMPLE_DATA_SRC || - sample_type & PERF_SAMPLE_WEIGHT_TYPE || - sample_type & PERF_SAMPLE_ADDR || - sample_type & PERF_SAMPLE_PHYS_ADDR))) + if (event->attr.sample_type & PERF_SAMPLE_RAW || + perf_ibs_is_mem_sample_type(perf_ibs, event) || + perf_ibs_ldlat_event(perf_ibs, event)) return perf_ibs->offset_max; else if (check_rip) return 3; return 1; } +static bool perf_ibs_is_kernel_data_addr(struct perf_event *event, + struct perf_ibs_data *ibs_data) +{ + u64 sample_type_mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_RAW; + union ibs_op_data3 op_data3; + u64 dc_lin_addr; + + op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; + + return unlikely((event->attr.sample_type & sample_type_mask) && + op_data3.dc_lin_addr_valid && kernel_ip(dc_lin_addr)); +} + +static bool perf_ibs_is_kernel_br_target(struct perf_event *event, + struct perf_ibs_data *ibs_data, + int br_target_idx) +{ + union ibs_op_data op_data; + u64 br_target; + + op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; + br_target = ibs_data->regs[br_target_idx]; + + return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) && + op_data.op_brn_ret && kernel_ip(br_target)); +} + +static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event, + struct pt_regs *regs, struct perf_ibs_data *ibs_data, + int br_target_idx) +{ + if (perf_exclude_event(event, regs)) + return true; + + if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel) + return false; + + if (perf_ibs_is_kernel_data_addr(event, ibs_data)) + return true; + + if (br_target_idx != -1 && + perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx)) + return true; + + return false; +} + +static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs, + struct perf_ibs_data *ibs_data) +{ + if (perf_ibs == &perf_ibs_op) { + ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18); + ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0; + return; + } + + ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52); + ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0; +} + static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); @@ -1048,6 +1235,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) int offset, size, check_rip, offset_max, throttle = 0; unsigned int msr; u64 *buf, *config, period, new_config = 0; + int br_target_idx = -1; if (!test_bit(IBS_STARTED, pcpu->state)) { fail: @@ -1069,7 +1257,7 @@ fail: hwc = &event->hw; msr = hwc->config_base; buf = ibs_data.regs; - rdmsrl(msr, *buf); + rdmsrq(msr, *buf); if (!(*buf++ & perf_ibs->valid_mask)) goto fail; @@ -1084,15 +1272,31 @@ fail: offset = 1; check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); - offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); + offset_max = perf_ibs_get_offset_max(perf_ibs, event, check_rip); do { - rdmsrl(msr + offset, *buf++); + rdmsrq(msr + offset, *buf++); size++; offset = find_next_bit(perf_ibs->offset_mask, perf_ibs->offset_max, offset + 1); } while (offset < offset_max); + + if (perf_ibs_ldlat_event(perf_ibs, event)) { + union ibs_op_data3 op_data3; + + op_data3.val = ibs_data.regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + /* + * Opening event is errored out if load latency threshold is + * outside of [128, 2048] range. Since the event has reached + * interrupt handler, we can safely assume the threshold is + * within [128, 2048] range. + */ + if (!op_data3.ld_op || !op_data3.dc_miss || + op_data3.dc_miss_lat <= (event->attr.config1 & 0xFFF)) + goto out; + } + /* * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately * depending on their availability. @@ -1101,16 +1305,17 @@ fail: if (event->attr.sample_type & PERF_SAMPLE_RAW) { if (perf_ibs == &perf_ibs_op) { if (ibs_caps & IBS_CAPS_BRNTRGT) { - rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + rdmsrq(MSR_AMD64_IBSBRTARGET, *buf++); + br_target_idx = size; size++; } if (ibs_caps & IBS_CAPS_OPDATA4) { - rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + rdmsrq(MSR_AMD64_IBSOPDATA4, *buf++); size++; } } if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { - rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); + rdmsrq(MSR_AMD64_ICIBSEXTDCTL, *buf++); size++; } } @@ -1129,10 +1334,19 @@ fail: } if ((event->attr.config2 & IBS_SW_FILTER_MASK) && - perf_exclude_event(event, ®s)) { + perf_ibs_swfilt_discard(perf_ibs, event, ®s, &ibs_data, br_target_idx)) { throttle = perf_event_account_interrupt(event); goto out; } + /* + * Prevent leaking physical addresses to unprivileged users. Skip + * PERF_SAMPLE_PHYS_ADDR check since generic code prevents it for + * unprivileged users. + */ + if ((event->attr.sample_type & PERF_SAMPLE_RAW) && + perf_allow_kernel()) { + perf_ibs_phyaddr_clear(perf_ibs, &ibs_data); + } if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw = (struct perf_raw_record){ @@ -1155,10 +1369,12 @@ fail: perf_sample_save_callchain(&data, event, iregs); throttle = perf_event_overflow(event, &data, ®s); + + if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) + hwc->sample_period = perf_ibs->min_period; + out: - if (throttle) { - perf_ibs_stop(event, 0); - } else { + if (!throttle) { if (perf_ibs == &perf_ibs_op) { if (ibs_caps & IBS_CAPS_OPCNTEXT) { new_config = period & IBS_OP_MAX_CNT_EXT_MASK; @@ -1244,7 +1460,8 @@ static __init int perf_ibs_op_init(void) if (ibs_caps & IBS_CAPS_OPCNTEXT) { perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; - perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; + perf_ibs_op.cnt_mask |= (IBS_OP_MAX_CNT_EXT_MASK | + IBS_OP_CUR_CNT_EXT_MASK); } if (ibs_caps & IBS_CAPS_ZEN4) @@ -1347,7 +1564,7 @@ static inline int ibs_eilvt_valid(void) preempt_disable(); - rdmsrl(MSR_AMD64_IBSCTL, val); + rdmsrq(MSR_AMD64_IBSCTL, val); offset = val & IBSCTL_LVT_OFFSET_MASK; if (!(val & IBSCTL_LVT_OFFSET_VALID)) { @@ -1462,7 +1679,7 @@ static inline int get_ibs_lvt_offset(void) { u64 val; - rdmsrl(MSR_AMD64_IBSCTL, val); + rdmsrq(MSR_AMD64_IBSCTL, val); if (!(val & IBSCTL_LVT_OFFSET_VALID)) return -EINVAL; diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index b15f7b950d2e..a721da9987dd 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -16,6 +16,8 @@ #include <linux/slab.h> #include <linux/amd-iommu.h> +#include <asm/msr.h> + #include "../perf_event.h" #include "iommu.h" @@ -30,7 +32,7 @@ #define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) #define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) -#define IOMMU_NAME_SIZE 16 +#define IOMMU_NAME_SIZE 24 struct perf_amd_iommu { struct list_head list; diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 19c7b76e21bc..d24da377df77 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/perf_event.h> +#include <asm/msr.h> #include <asm/perf_event.h> #include "../perf_event.h" @@ -61,19 +62,19 @@ struct branch_entry { static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) { - wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); + wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val); } static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) { - wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); + wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); } static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) { u64 val; - rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); + rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val); return val; } @@ -82,7 +83,7 @@ static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) { u64 val; - rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); + rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); return val; } @@ -333,7 +334,7 @@ void amd_pmu_lbr_reset(void) cpuc->last_task_ctx = NULL; cpuc->last_log_id = 0; - wrmsrl(MSR_AMD64_LBR_SELECT, 0); + wrmsrq(MSR_AMD64_LBR_SELECT, 0); } void amd_pmu_lbr_add(struct perf_event *event) @@ -371,7 +372,8 @@ void amd_pmu_lbr_del(struct perf_event *event) perf_sched_cb_dec(event->pmu); } -void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -395,16 +397,16 @@ void amd_pmu_lbr_enable_all(void) /* Set hardware branch filter */ if (cpuc->lbr_select) { lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK; - wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); + wrmsrq(MSR_AMD64_LBR_SELECT, lbr_select); } if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); } - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); + rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } void amd_pmu_lbr_disable_all(void) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 37d5b380516e..dad42790cf7d 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "../perf_event.h" /* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */ @@ -48,8 +49,8 @@ static void event_update(struct perf_event *event) prev_pwr_acc = hwc->pwr_acc; prev_ptsc = hwc->ptsc; - rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc); - rdmsrl(MSR_F15H_PTSC, new_ptsc); + rdmsrq(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc); + rdmsrq(MSR_F15H_PTSC, new_ptsc); /* * Calculate the CU power consumption over a time period, the unit of @@ -75,8 +76,8 @@ static void __pmu_event_start(struct perf_event *event) event->hw.state = 0; - rdmsrl(MSR_F15H_PTSC, event->hw.ptsc); - rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc); + rdmsrq(MSR_F15H_PTSC, event->hw.ptsc); + rdmsrq(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc); } static void pmu_event_start(struct perf_event *event, int mode) @@ -272,7 +273,7 @@ static int __init amd_power_pmu_init(void) cpu_pwr_sample_ratio = cpuid_ecx(0x80000007); - if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) { + if (rdmsrq_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) { pr_err("Failed to read max compute unit power accumulator MSR\n"); return -ENODEV; } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 49c26ce2b115..e8b6af199c73 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -21,6 +21,7 @@ #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 #define NUM_COUNTERS_L3 6 +#define NUM_COUNTERS_MAX 64 #define RDPMC_BASE_NB 6 #define RDPMC_BASE_LLC 10 @@ -38,7 +39,10 @@ struct amd_uncore_ctx { int refcnt; int cpu; struct perf_event **events; - struct hlist_node node; + unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)]; + int nr_active; + struct hrtimer hrtimer; + u64 hrtimer_duration; }; struct amd_uncore_pmu { @@ -83,11 +87,51 @@ struct amd_uncore { static struct amd_uncore uncores[UNCORE_TYPE_MAX]; +/* Interval for hrtimer, defaults to 60000 milliseconds */ +static unsigned int update_interval = 60 * MSEC_PER_SEC; +module_param(update_interval, uint, 0444); + static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) { return container_of(event->pmu, struct amd_uncore_pmu, pmu); } +static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer) +{ + struct amd_uncore_ctx *ctx; + struct perf_event *event; + int bit; + + ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer); + + if (!ctx->nr_active || ctx->cpu != smp_processor_id()) + return HRTIMER_NORESTART; + + for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) { + event = ctx->events[bit]; + event->pmu->read(event); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration)); + return HRTIMER_RESTART; +} + +static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration), + HRTIMER_MODE_REL_PINNED_HARD); +} + +static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_cancel(&ctx->hrtimer); +} + +static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); +} + static void amd_uncore_read(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -106,9 +150,9 @@ static void amd_uncore_read(struct perf_event *event) * read counts directly from the corresponding PERF_CTR. */ if (hwc->event_base_rdpmc < 0) - rdmsrl(hwc->event_base, new); + rdmsrq(hwc->event_base, new); else - rdpmcl(hwc->event_base_rdpmc, new); + new = rdpmc(hwc->event_base_rdpmc); local64_set(&hwc->prev_count, new); delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); @@ -118,27 +162,40 @@ static void amd_uncore_read(struct perf_event *event) static void amd_uncore_start(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; + if (!ctx->nr_active++) + amd_uncore_start_hrtimer(ctx); + if (flags & PERF_EF_RELOAD) - wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count)); hwc->state = 0; - wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); + __set_bit(hwc->idx, ctx->active_mask); + wrmsrq(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); perf_event_update_userpage(event); } static void amd_uncore_stop(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config); hwc->state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { event->pmu->read(event); hwc->state |= PERF_HES_UPTODATE; } + + if (!--ctx->nr_active) + amd_uncore_cancel_hrtimer(ctx); + + __clear_bit(hwc->idx, ctx->active_mask); } static int amd_uncore_add(struct perf_event *event, int flags) @@ -491,6 +548,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) goto fail; } + amd_uncore_init_hrtimer(curr); + curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC; + cpumask_set_cpu(cpu, &pmu->active_mask); } @@ -880,16 +940,55 @@ static int amd_uncore_umc_event_init(struct perf_event *event) static void amd_uncore_umc_start(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; + if (!ctx->nr_active++) + amd_uncore_start_hrtimer(ctx); + if (flags & PERF_EF_RELOAD) - wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count)); hwc->state = 0; - wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); + __set_bit(hwc->idx, ctx->active_mask); + wrmsrq(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); perf_event_update_userpage(event); } +static void amd_uncore_umc_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new, shift; + s64 delta; + + shift = COUNTER_SHIFT + 1; + prev = local64_read(&hwc->prev_count); + + /* + * UMC counters do not have RDPMC assignments. Read counts directly + * from the corresponding PERF_CTR. + */ + rdmsrl(hwc->event_base, new); + + /* + * Unlike the other uncore counters, UMC counters saturate and set the + * Overflow bit (bit 48) on overflow. Since they do not roll over, + * proactively reset the corresponding PERF_CTR when bit 47 is set so + * that the counter never gets a chance to saturate. + */ + if (new & BIT_ULL(63 - COUNTER_SHIFT)) { + wrmsrl(hwc->event_base, 0); + local64_set(&hwc->prev_count, 0); + } else { + local64_set(&hwc->prev_count, new); + } + + delta = (new << shift) - (prev << shift); + delta >>= shift; + local64_add(delta, &event->count); +} + static void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { @@ -968,7 +1067,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) .del = amd_uncore_del, .start = amd_uncore_umc_start, .stop = amd_uncore_stop, - .read = amd_uncore_read, + .read = amd_uncore_umc_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .module = THIS_MODULE, }; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 2092d615333d..7610f26dfbd9 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -32,6 +32,7 @@ #include <asm/apic.h> #include <asm/stacktrace.h> +#include <asm/msr.h> #include <asm/nmi.h> #include <asm/smp.h> #include <asm/alternative.h> @@ -87,13 +88,19 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling); DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling); DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task); -DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter); +DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup); + +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all); + /* * This one is magic, it will get called even when PMU init fails (because * there is no PMU), in which case it should simply return NULL. @@ -133,7 +140,7 @@ u64 x86_perf_event_update(struct perf_event *event) */ prev_raw_count = local64_read(&hwc->prev_count); do { - rdpmcl(hwc->event_base_rdpmc, new_raw_count); + new_raw_count = rdpmc(hwc->event_base_rdpmc); } while (!local64_try_cmpxchg(&hwc->prev_count, &prev_raw_count, new_raw_count)); @@ -268,7 +275,7 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask, */ for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) { reg = x86_pmu_config_addr(i); - ret = rdmsrl_safe(reg, &val); + ret = rdmsrq_safe(reg, &val); if (ret) goto msr_fail; if (val & ARCH_PERFMON_EVENTSEL_ENABLE) { @@ -282,7 +289,7 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask, if (*(u64 *)fixed_cntr_mask) { reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - ret = rdmsrl_safe(reg, &val); + ret = rdmsrq_safe(reg, &val); if (ret) goto msr_fail; for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) { @@ -313,11 +320,11 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask, * (qemu/kvm) that don't trap on the MSR access and always return 0s. */ reg = x86_pmu_event_addr(reg_safe); - if (rdmsrl_safe(reg, &val)) + if (rdmsrq_safe(reg, &val)) goto msr_fail; val ^= 0xffffUL; - ret = wrmsrl_safe(reg, val); - ret |= rdmsrl_safe(reg, &val_new); + ret = wrmsrq_safe(reg, val); + ret |= rdmsrq_safe(reg, &val_new); if (ret || val != val_new) goto msr_fail; @@ -628,7 +635,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (!event->attr.freq && x86_pmu.limit_period) { + if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) @@ -673,6 +680,7 @@ static int __x86_pmu_event_init(struct perf_event *event) event->hw.idx = -1; event->hw.last_cpu = -1; event->hw.last_tag = ~0ULL; + event->hw.dyn_constraint = ~0ULL; /* mark unused */ event->hw.extra_reg.idx = EXTRA_REG_NONE; @@ -692,13 +700,13 @@ void x86_pmu_disable_all(void) if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(x86_pmu_config_addr(idx), val); + rdmsrq(x86_pmu_config_addr(idx), val); if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) continue; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu_config_addr(idx), val); + wrmsrq(x86_pmu_config_addr(idx), val); if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_config_addr(idx + 1), 0); + wrmsrq(x86_pmu_config_addr(idx + 1), 0); } } @@ -753,17 +761,18 @@ void x86_pmu_enable_all(int added) } } -static inline int is_x86_event(struct perf_event *event) +int is_x86_event(struct perf_event *event) { - int i; - - if (!is_hybrid()) - return event->pmu == &pmu; - - for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu) - return true; - } + /* + * For a non-hybrid platforms, the type of X86 pmu is + * always PERF_TYPE_RAW. + * For a hybrid platform, the PERF_PMU_CAP_EXTENDED_HW_TYPE + * is a unique capability for the X86 PMU. + * Use them to detect a X86 event. + */ + if (event->pmu->type == PERF_TYPE_RAW || + event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE) + return true; return false; } @@ -1298,6 +1307,15 @@ static void x86_pmu_enable(struct pmu *pmu) if (cpuc->n_added) { int n_running = cpuc->n_events - cpuc->n_added; + + /* + * The late setup (after counters are scheduled) + * is required for some cases, e.g., PEBS counters + * snapshotting. Because an accurate counter index + * is needed. + */ + static_call_cond(x86_pmu_late_setup)(); + /* * apply assignment obtained either from * hw_perf_group_sched_in() or x86_pmu_enable() @@ -1410,14 +1428,14 @@ int x86_perf_event_set_period(struct perf_event *event) */ local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + wrmsrq(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* * Sign extend the Merge event counter's upper 16 bits since * we currently declare a 48-bit counter width */ if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff); + wrmsrq(x86_pmu_event_addr(idx + 1), 0xffff); perf_event_update_userpage(event); @@ -1540,10 +1558,10 @@ void perf_event_print_debug(void) return; if (x86_pmu.version >= 2) { - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + rdmsrq(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); pr_info("\n"); pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); @@ -1551,19 +1569,19 @@ void perf_event_print_debug(void) pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); if (pebs_constraints) { - rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); + rdmsrq(MSR_IA32_PEBS_ENABLE, pebs); pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } if (x86_pmu.lbr_nr) { - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl); } } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) { - rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); - rdmsrl(x86_pmu_event_addr(idx), pmc_count); + rdmsrq(x86_pmu_config_addr(idx), pmc_ctrl); + rdmsrq(x86_pmu_event_addr(idx), pmc_count); prev_left = per_cpu(pmc_prev_left[idx], cpu); @@ -1577,7 +1595,7 @@ void perf_event_print_debug(void) for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) { if (fixed_counter_disabled(idx, cpuc->pmu)) continue; - rdmsrl(x86_pmu_fixed_ctr_addr(idx), pmc_count); + rdmsrq(x86_pmu_fixed_ctr_addr(idx), pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", cpu, idx, pmc_count); @@ -1673,6 +1691,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_events *cpuc; struct perf_event *event; int idx, handled = 0; + u64 last_period; u64 val; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1692,6 +1711,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) continue; event = cpuc->events[idx]; + last_period = event->hw.last_period; val = static_call(x86_pmu_update)(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) @@ -1705,12 +1725,11 @@ int x86_pmu_handle_irq(struct pt_regs *regs) if (!static_call(x86_pmu_set_period)(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } if (handled) @@ -2028,13 +2047,19 @@ static void x86_pmu_static_call_update(void) static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling); static_call_update(x86_pmu_sched_task, x86_pmu.sched_task); - static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx); static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs); static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs); static_call_update(x86_pmu_filter, x86_pmu.filter); + + static_call_update(x86_pmu_late_setup, x86_pmu.late_setup); + + static_call_update(x86_pmu_pebs_enable, x86_pmu.pebs_enable); + static_call_update(x86_pmu_pebs_disable, x86_pmu.pebs_disable); + static_call_update(x86_pmu_pebs_enable_all, x86_pmu.pebs_enable_all); + static_call_update(x86_pmu_pebs_disable_all, x86_pmu.pebs_disable_all); } static void _x86_pmu_read(struct perf_event *event) @@ -2485,9 +2510,9 @@ void perf_clear_dirty_counters(void) if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask))) continue; - wrmsrl(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0); + wrmsrq(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0); } else { - wrmsrl(x86_pmu_event_addr(i), 0); + wrmsrq(x86_pmu_event_addr(i), 0); } } @@ -2625,15 +2650,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; -static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) -{ - static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in); -} - -static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) +static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { - static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc); + static_call_cond(x86_pmu_sched_task)(pmu_ctx, task, sched_in); } void perf_check_microcode(void) @@ -2700,7 +2720,6 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .sched_task = x86_pmu_sched_task, - .swap_task_ctx = x86_pmu_swap_task_ctx, .check_period = x86_pmu_check_period, .aux_output_match = x86_pmu_aux_output_match, @@ -2798,8 +2817,15 @@ static unsigned long get_segment_base(unsigned int segment) #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; + /* + * If we're not in a valid context with a real (not just lazy) + * user mm, then don't even try. + */ + if (!nmi_uaccess_okay()) + return 0; + /* IRQs are off, so this synchronizes with smp_store_release */ - ldt = READ_ONCE(current->active_mm->context.ldt); + ldt = smp_load_acquire(¤t->mm->context.ldt); if (!ldt || idx >= ldt->nr_entries) return 0; @@ -2844,7 +2870,7 @@ static bool is_uprobe_at_func_entry(struct pt_regs *regs) return true; /* endbr64 (64-bit only) */ - if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn)) + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) return true; return false; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 8f78b0c900ef..61da6b8a3d51 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -17,6 +17,7 @@ #include <linux/sizes.h> #include <asm/perf_event.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -36,7 +37,7 @@ enum { BTS_STATE_ACTIVE, }; -static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); +static struct bts_ctx __percpu *bts_ctx; #define BTS_RECORD_SIZE 24 #define BTS_SAFETY_MARGIN 4080 @@ -58,7 +59,7 @@ struct bts_buffer { local_t head; unsigned long end; void **data_pages; - struct bts_phys buf[]; + struct bts_phys buf[] __counted_by(nr_bufs); }; static struct pmu bts_pmu; @@ -80,54 +81,54 @@ static void * bts_buffer_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - struct bts_buffer *buf; + struct bts_buffer *bb; struct page *page; int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; - int pg, nbuf, pad; + int pg, nr_buf, pad; /* count all the high order buffers */ - for (pg = 0, nbuf = 0; pg < nr_pages;) { + for (pg = 0, nr_buf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); pg += buf_nr_pages(page); - nbuf++; + nr_buf++; } /* * to avoid interrupts in overwrite mode, only allow one physical */ - if (overwrite && nbuf > 1) + if (overwrite && nr_buf > 1) return NULL; - buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node); - if (!buf) + bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node); + if (!bb) return NULL; - buf->nr_pages = nr_pages; - buf->nr_bufs = nbuf; - buf->snapshot = overwrite; - buf->data_pages = pages; - buf->real_size = size - size % BTS_RECORD_SIZE; + bb->nr_pages = nr_pages; + bb->nr_bufs = nr_buf; + bb->snapshot = overwrite; + bb->data_pages = pages; + bb->real_size = size - size % BTS_RECORD_SIZE; - for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { + for (pg = 0, nr_buf = 0, offset = 0, pad = 0; nr_buf < bb->nr_bufs; nr_buf++) { unsigned int __nr_pages; page = virt_to_page(pages[pg]); __nr_pages = buf_nr_pages(page); - buf->buf[nbuf].page = page; - buf->buf[nbuf].offset = offset; - buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; - pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; - buf->buf[nbuf].size -= pad; + bb->buf[nr_buf].page = page; + bb->buf[nr_buf].offset = offset; + bb->buf[nr_buf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); + bb->buf[nr_buf].size = buf_size(page) - bb->buf[nr_buf].displacement; + pad = bb->buf[nr_buf].size % BTS_RECORD_SIZE; + bb->buf[nr_buf].size -= pad; pg += __nr_pages; offset += __nr_pages << PAGE_SHIFT; } - return buf; + return bb; } static void bts_buffer_free_aux(void *data) @@ -135,25 +136,25 @@ static void bts_buffer_free_aux(void *data) kfree(data); } -static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) +static unsigned long bts_buffer_offset(struct bts_buffer *bb, unsigned int idx) { - return buf->buf[idx].offset + buf->buf[idx].displacement; + return bb->buf[idx].offset + bb->buf[idx].displacement; } static void -bts_config_buffer(struct bts_buffer *buf) +bts_config_buffer(struct bts_buffer *bb) { int cpu = raw_smp_processor_id(); struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - struct bts_phys *phys = &buf->buf[buf->cur_buf]; + struct bts_phys *phys = &bb->buf[bb->cur_buf]; unsigned long index, thresh = 0, end = phys->size; struct page *page = phys->page; - index = local_read(&buf->head); + index = local_read(&bb->head); - if (!buf->snapshot) { - if (buf->end < phys->offset + buf_size(page)) - end = buf->end - phys->offset - phys->displacement; + if (!bb->snapshot) { + if (bb->end < phys->offset + buf_size(page)) + end = bb->end - phys->offset - phys->displacement; index -= phys->offset + phys->displacement; @@ -168,7 +169,7 @@ bts_config_buffer(struct bts_buffer *buf) ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement; ds->bts_index = ds->bts_buffer_base + index; ds->bts_absolute_maximum = ds->bts_buffer_base + end; - ds->bts_interrupt_threshold = !buf->snapshot + ds->bts_interrupt_threshold = !bb->snapshot ? ds->bts_buffer_base + thresh : ds->bts_absolute_maximum + BTS_RECORD_SIZE; } @@ -184,16 +185,16 @@ static void bts_update(struct bts_ctx *bts) { int cpu = raw_smp_processor_id(); struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *bb = perf_get_aux(&bts->handle); unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head; - if (!buf) + if (!bb) return; - head = index + bts_buffer_offset(buf, buf->cur_buf); - old = local_xchg(&buf->head, head); + head = index + bts_buffer_offset(bb, bb->cur_buf); + old = local_xchg(&bb->head, head); - if (!buf->snapshot) { + if (!bb->snapshot) { if (old == head) return; @@ -205,9 +206,9 @@ static void bts_update(struct bts_ctx *bts) * old and head are always in the same physical buffer, so we * can subtract them to get the data size. */ - local_add(head - old, &buf->data_size); + local_add(head - old, &bb->data_size); } else { - local_set(&buf->data_size, head); + local_set(&bb->data_size, head); } /* @@ -218,7 +219,7 @@ static void bts_update(struct bts_ctx *bts) } static int -bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle); /* * Ordering PMU callbacks wrt themselves and the PMI is done by means @@ -231,18 +232,18 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); static void __bts_event_start(struct perf_event *event) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); + struct bts_buffer *bb = perf_get_aux(&bts->handle); u64 config = 0; - if (!buf->snapshot) + if (!bb->snapshot) config |= ARCH_PERFMON_EVENTSEL_INT; if (!event->attr.exclude_kernel) config |= ARCH_PERFMON_EVENTSEL_OS; if (!event->attr.exclude_user) config |= ARCH_PERFMON_EVENTSEL_USR; - bts_config_buffer(buf); + bts_config_buffer(bb); /* * local barrier to make sure that ds configuration made it @@ -260,14 +261,14 @@ static void __bts_event_start(struct perf_event *event) static void bts_event_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf; + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); + struct bts_buffer *bb; - buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) + bb = perf_aux_output_begin(&bts->handle, event); + if (!bb) goto fail_stop; - if (bts_buffer_reset(buf, &bts->handle)) + if (bts_buffer_reset(bb, &bts->handle)) goto fail_end_stop; bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; @@ -290,7 +291,7 @@ fail_stop: static void __bts_event_stop(struct perf_event *event, int state) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ WRITE_ONCE(bts->state, state); @@ -305,28 +306,28 @@ static void __bts_event_stop(struct perf_event *event, int state) static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = NULL; + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); + struct bts_buffer *bb = NULL; int state = READ_ONCE(bts->state); if (state == BTS_STATE_ACTIVE) __bts_event_stop(event, BTS_STATE_STOPPED); if (state != BTS_STATE_STOPPED) - buf = perf_get_aux(&bts->handle); + bb = perf_get_aux(&bts->handle); event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); - if (buf) { - if (buf->snapshot) + if (bb) { + if (bb->snapshot) bts->handle.head = - local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); + local_xchg(&bb->data_size, + bb->nr_pages << PAGE_SHIFT); perf_aux_output_end(&bts->handle, - local_xchg(&buf->data_size, 0)); + local_xchg(&bb->data_size, 0)); } cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; @@ -338,9 +339,14 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - int state = READ_ONCE(bts->state); + struct bts_ctx *bts; + int state; + + if (!bts_ctx) + return; + bts = this_cpu_ptr(bts_ctx); + state = READ_ONCE(bts->state); /* * Here we transition from INACTIVE to ACTIVE; * if we instead are STOPPED from the interrupt handler, @@ -358,7 +364,12 @@ void intel_bts_enable_local(void) void intel_bts_disable_local(void) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts; + + if (!bts_ctx) + return; + + bts = this_cpu_ptr(bts_ctx); /* * Here we transition from ACTIVE to INACTIVE; @@ -372,19 +383,19 @@ void intel_bts_disable_local(void) } static int -bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) +bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle) { unsigned long head, space, next_space, pad, gap, skip, wakeup; unsigned int next_buf; struct bts_phys *phys, *next_phys; int ret; - if (buf->snapshot) + if (bb->snapshot) return 0; - head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); + head = handle->head & ((bb->nr_pages << PAGE_SHIFT) - 1); - phys = &buf->buf[buf->cur_buf]; + phys = &bb->buf[bb->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; pad = space; if (space > handle->size) { @@ -393,10 +404,10 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) } if (space <= BTS_SAFETY_MARGIN) { /* See if next phys buffer has more space */ - next_buf = buf->cur_buf + 1; - if (next_buf >= buf->nr_bufs) + next_buf = bb->cur_buf + 1; + if (next_buf >= bb->nr_bufs) next_buf = 0; - next_phys = &buf->buf[next_buf]; + next_phys = &bb->buf[next_buf]; gap = buf_size(phys->page) - phys->displacement - phys->size + next_phys->displacement; skip = pad + gap; @@ -421,8 +432,8 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) * anymore, so we must not be racing with * bts_update(). */ - buf->cur_buf = next_buf; - local_set(&buf->head, head); + bb->cur_buf = next_buf; + local_set(&bb->head, head); } } } @@ -435,7 +446,7 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) space -= space % BTS_RECORD_SIZE; } - buf->end = head + space; + bb->end = head + space; /* * If we have no space, the lost notification would have been sent when @@ -450,12 +461,17 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct perf_event *event = bts->handle.event; - struct bts_buffer *buf; + struct bts_ctx *bts; + struct perf_event *event; + struct bts_buffer *bb; s64 old_head; int err = -ENOSPC, handled = 0; + if (!bts_ctx) + return 0; + + bts = this_cpu_ptr(bts_ctx); + event = bts->handle.event; /* * The only surefire way of knowing if this NMI is ours is by checking * the write ptr against the PMI threshold. @@ -470,8 +486,8 @@ int intel_bts_interrupt(void) if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) return handled; - buf = perf_get_aux(&bts->handle); - if (!buf) + bb = perf_get_aux(&bts->handle); + if (!bb) return handled; /* @@ -479,26 +495,26 @@ int intel_bts_interrupt(void) * there's no other way of telling, because the pointer will * keep moving */ - if (buf->snapshot) + if (bb->snapshot) return 0; - old_head = local_read(&buf->head); + old_head = local_read(&bb->head); bts_update(bts); /* no new data */ - if (old_head == local_read(&buf->head)) + if (old_head == local_read(&bb->head)) return handled; - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0)); + perf_aux_output_end(&bts->handle, local_xchg(&bb->data_size, 0)); - buf = perf_aux_output_begin(&bts->handle, event); - if (buf) - err = bts_buffer_reset(buf, &bts->handle); + bb = perf_aux_output_begin(&bts->handle, event); + if (bb) + err = bts_buffer_reset(bb, &bts->handle); if (err) { WRITE_ONCE(bts->state, BTS_STATE_STOPPED); - if (buf) { + if (bb) { /* * BTS_STATE_STOPPED should be visible before * cleared handle::event @@ -518,7 +534,7 @@ static void bts_event_del(struct perf_event *event, int mode) static int bts_event_add(struct perf_event *event, int mode) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -559,7 +575,7 @@ static int bts_event_init(struct perf_event *event) * to the user in a zero-copy fashion. */ if (event->attr.exclude_kernel) { - ret = perf_allow_kernel(&event->attr); + ret = perf_allow_kernel(); if (ret) return ret; } @@ -584,7 +600,11 @@ static void bts_event_read(struct perf_event *event) static __init int bts_init(void) { - if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) + if (!boot_cpu_has(X86_FEATURE_DTES64)) + return -ENODEV; + + x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); + if (!x86_pmu.bts) return -ENODEV; if (boot_cpu_has(X86_FEATURE_PTI)) { @@ -605,6 +625,10 @@ static __init int bts_init(void) return -ENODEV; } + bts_ctx = alloc_percpu(struct bts_ctx); + if (!bts_ctx) + return -ENOMEM; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index cdb19e3ba3aa..c2fb729c270e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -23,6 +23,7 @@ #include <asm/intel_pt.h> #include <asm/apic.h> #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -2224,6 +2225,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01"); +EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02"); + +static struct attribute *skt_events_attrs[] = { + EVENT_PTR(td_fe_bound_skt), + EVENT_PTR(td_retiring_skt), + EVENT_PTR(td_bad_spec_cmt), + EVENT_PTR(td_be_bound_skt), + NULL, +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -2285,7 +2298,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0); if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); @@ -2294,7 +2307,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts) static __always_inline void intel_pmu_disable_all(void) { __intel_pmu_disable_all(true); - intel_pmu_pebs_disable_all(); + static_call_cond(x86_pmu_pebs_disable_all)(); intel_pmu_lbr_disable_all(); } @@ -2306,11 +2319,11 @@ static void __intel_pmu_enable_all(int added, bool pmi) intel_pmu_lbr_enable_all(pmi); if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) { - wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val); + wrmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val); cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val; } - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, intel_ctrl & ~cpuc->intel_ctrl_guest_mask); if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { @@ -2326,7 +2339,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) static void intel_pmu_enable_all(int added) { - intel_pmu_pebs_enable_all(); + static_call_cond(x86_pmu_pebs_enable_all)(); __intel_pmu_enable_all(added, false); } @@ -2426,12 +2439,12 @@ static void intel_pmu_nhm_workaround(void) } for (i = 0; i < 4; i++) { - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); - wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); + wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); + wrmsrq(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); } - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); for (i = 0; i < 4; i++) { event = cpuc->events[i]; @@ -2441,7 +2454,7 @@ static void intel_pmu_nhm_workaround(void) __x86_pmu_enable_event(&event->hw, ARCH_PERFMON_EVENTSEL_ENABLE); } else - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); + wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); } } @@ -2458,7 +2471,7 @@ static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on) if (cpuc->tfa_shadow != val) { cpuc->tfa_shadow = val; - wrmsrl(MSR_TSX_FORCE_ABORT, val); + wrmsrq(MSR_TSX_FORCE_ABORT, val); } } @@ -2489,14 +2502,14 @@ static inline u64 intel_pmu_get_status(void) { u64 status; - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status); return status; } static inline void intel_pmu_ack_status(u64 ack) { - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); + wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } static inline bool event_is_checkpointed(struct perf_event *event) @@ -2583,7 +2596,7 @@ static void intel_pmu_disable_event(struct perf_event *event) * so we don't trigger the event without PEBS bit set. */ if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_disable(event); + static_call(x86_pmu_pebs_disable)(event); } static void intel_pmu_assign_event(struct perf_event *event, int idx) @@ -2603,6 +2616,9 @@ static void intel_pmu_del_event(struct perf_event *event) intel_pmu_lbr_del(event); if (event->attr.precise_ip) intel_pmu_pebs_del(event); + if (is_pebs_counter_event_group(event) || + is_acr_event_group(event)) + this_cpu_ptr(&cpu_hw_events)->n_late_setup--; } static int icl_set_topdown_event_period(struct perf_event *event) @@ -2619,15 +2635,15 @@ static int icl_set_topdown_event_period(struct perf_event *event) * Don't need to clear them again. */ if (left == x86_pmu.max_period) { - wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); - wrmsrl(MSR_PERF_METRICS, 0); + wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrq(MSR_PERF_METRICS, 0); hwc->saved_slots = 0; hwc->saved_metric = 0; } if ((hwc->saved_slots) && is_slots_event(event)) { - wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots); - wrmsrl(MSR_PERF_METRICS, hwc->saved_metric); + wrmsrq(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots); + wrmsrq(MSR_PERF_METRICS, hwc->saved_metric); } perf_event_update_userpage(event); @@ -2714,7 +2730,7 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots, * modify by a NMI. PMU has to be disabled before calling this function. */ -static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) +static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *other; @@ -2722,13 +2738,24 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) bool reset = true; int idx; - /* read Fixed counter 3 */ - rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots); - if (!slots) - return 0; + if (!val) { + /* read Fixed counter 3 */ + slots = rdpmc(3 | INTEL_PMC_FIXED_RDPMC_BASE); + if (!slots) + return 0; - /* read PERF_METRICS */ - rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics); + /* read PERF_METRICS */ + metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS); + } else { + slots = val[0]; + metrics = val[1]; + /* + * Don't reset the PERF_METRICS and Fixed counter 3 + * for each PEBS record read. Utilize the RDPMC metrics + * clear mode. + */ + reset = false; + } for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { if (!is_topdown_idx(idx)) @@ -2762,8 +2789,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) if (reset) { /* The fixed counter 3 has to be written before the PERF_METRICS. */ - wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); - wrmsrl(MSR_PERF_METRICS, 0); + wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrq(MSR_PERF_METRICS, 0); if (event) update_saved_topdown_regs(event, 0, 0, metric_end); } @@ -2771,36 +2798,47 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) return slots; } -static u64 icl_update_topdown_event(struct perf_event *event) +static u64 icl_update_topdown_event(struct perf_event *event, u64 *val) { return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE + - x86_pmu.num_topdown_events - 1); + x86_pmu.num_topdown_events - 1, + val); } -DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); +DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); -static void intel_pmu_read_topdown_event(struct perf_event *event) +static void intel_pmu_read_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) || + is_pebs_counter_event_group(event)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool pmu_enabled = cpuc->enabled; - /* Only need to call update_topdown_event() once for group read. */ - if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && - !is_slots_event(event)) - return; + /* Only need to call update_topdown_event() once for group read. */ + if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ)) + return; - perf_pmu_disable(event->pmu); - static_call(intel_pmu_update_topdown_event)(event); - perf_pmu_enable(event->pmu); -} + cpuc->enabled = 0; + if (pmu_enabled) + intel_pmu_disable_all(); -static void intel_pmu_read_event(struct perf_event *event) -{ - if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) - intel_pmu_auto_reload_read(event); - else if (is_topdown_count(event)) - intel_pmu_read_topdown_event(event); - else - x86_perf_event_update(event); + /* + * If the PEBS counters snapshotting is enabled, + * the topdown event is available in PEBS records. + */ + if (is_topdown_count(event) && !is_pebs_counter_event_group(event)) + static_call(intel_pmu_update_topdown_event)(event, NULL); + else + intel_pmu_drain_pebs_buffer(); + + cpuc->enabled = pmu_enabled; + if (pmu_enabled) + intel_pmu_enable_all(0); + + return; + } + + x86_perf_event_update(event); } static void intel_pmu_enable_fixed(struct perf_event *event) @@ -2858,6 +2896,54 @@ static void intel_pmu_enable_fixed(struct perf_event *event) cpuc->fixed_ctrl_val |= bits; } +static void intel_pmu_config_acr(int idx, u64 mask, u32 reload) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int msr_b, msr_c; + int msr_offset; + + if (!mask && !cpuc->acr_cfg_b[idx]) + return; + + if (idx < INTEL_PMC_IDX_FIXED) { + msr_b = MSR_IA32_PMC_V6_GP0_CFG_B; + msr_c = MSR_IA32_PMC_V6_GP0_CFG_C; + msr_offset = x86_pmu.addr_offset(idx, false); + } else { + msr_b = MSR_IA32_PMC_V6_FX0_CFG_B; + msr_c = MSR_IA32_PMC_V6_FX0_CFG_C; + msr_offset = x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false); + } + + if (cpuc->acr_cfg_b[idx] != mask) { + wrmsrl(msr_b + msr_offset, mask); + cpuc->acr_cfg_b[idx] = mask; + } + /* Only need to update the reload value when there is a valid config value. */ + if (mask && cpuc->acr_cfg_c[idx] != reload) { + wrmsrl(msr_c + msr_offset, reload); + cpuc->acr_cfg_c[idx] = reload; + } +} + +static void intel_pmu_enable_acr(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!is_acr_event_group(event) || !event->attr.config2) { + /* + * The disable doesn't clear the ACR CFG register. + * Check and clear the ACR CFG register. + */ + intel_pmu_config_acr(hwc->idx, 0, 0); + return; + } + + intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period); +} + +DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr); + static void intel_pmu_enable_event(struct perf_event *event) { u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; @@ -2865,16 +2951,19 @@ static void intel_pmu_enable_event(struct perf_event *event) int idx = hwc->idx; if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_enable(event); + static_call(x86_pmu_pebs_enable)(event); switch (idx) { case 0 ... INTEL_PMC_IDX_FIXED - 1: if (branch_sample_counters(event)) enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; intel_set_masks(event, idx); + static_call_cond(intel_pmu_enable_acr_event)(event); __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + static_call_cond(intel_pmu_enable_acr_event)(event); + fallthrough; case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: intel_pmu_enable_fixed(event); break; @@ -2892,12 +2981,51 @@ static void intel_pmu_enable_event(struct perf_event *event) } } +static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) +{ + struct perf_event *event, *leader; + int i, j, idx; + + for (i = 0; i < cpuc->n_events; i++) { + leader = cpuc->event_list[i]; + if (!is_acr_event_group(leader)) + continue; + + /* The ACR events must be contiguous. */ + for (j = i; j < cpuc->n_events; j++) { + event = cpuc->event_list[j]; + if (event->group_leader != leader->group_leader) + break; + for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { + if (WARN_ON_ONCE(i + idx > cpuc->n_events)) + return; + __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1); + } + } + i = j - 1; + } +} + +void intel_pmu_late_setup(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!cpuc->n_late_setup) + return; + + intel_pmu_pebs_late_setup(cpuc); + intel_pmu_acr_late_setup(cpuc); +} + static void intel_pmu_add_event(struct perf_event *event) { if (event->attr.precise_ip) intel_pmu_pebs_add(event); if (intel_pmu_needs_branch_stack(event)) intel_pmu_lbr_add(event); + if (is_pebs_counter_event_group(event) || + is_acr_event_group(event)) + this_cpu_ptr(&cpu_hw_events)->n_late_setup++; } /* @@ -2915,7 +3043,7 @@ int intel_pmu_save_and_restart(struct perf_event *event) */ if (unlikely(event_is_checkpointed(event))) { /* No race with NMIs because the counter should not be armed */ - wrmsrl(event->hw.event_base, 0); + wrmsrq(event->hw.event_base, 0); local64_set(&event->hw.prev_count, 0); } return static_call(x86_pmu_set_period)(event); @@ -2932,7 +3060,7 @@ static int intel_pmu_set_period(struct perf_event *event) static u64 intel_pmu_update(struct perf_event *event) { if (unlikely(is_topdown_count(event))) - return static_call(intel_pmu_update_topdown_event)(event); + return static_call(intel_pmu_update_topdown_event)(event, NULL); return x86_perf_event_update(event); } @@ -2954,13 +3082,13 @@ static void intel_pmu_reset(void) pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) { - wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); - wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); + wrmsrq_safe(x86_pmu_config_addr(idx), 0ull); + wrmsrq_safe(x86_pmu_event_addr(idx), 0ull); } for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) { if (fixed_counter_disabled(idx, cpuc->pmu)) continue; - wrmsrl_safe(x86_pmu_fixed_ctr_addr(idx), 0ull); + wrmsrq_safe(x86_pmu_fixed_ctr_addr(idx), 0ull); } if (ds) @@ -2969,7 +3097,7 @@ static void intel_pmu_reset(void) /* Ack all overflows and disable fixed counters */ if (x86_pmu.version >= 2) { intel_pmu_ack_status(intel_pmu_get_status()); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0); } /* Reset LBRs and LBR freezing */ @@ -3013,8 +3141,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs, continue; perf_sample_data_init(data, 0, event->hw.last_period); - if (perf_event_overflow(event, data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, data, regs); /* Inject one fake event is enough. */ break; @@ -3027,7 +3154,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int bit; int handled = 0; - u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); inc_irq_stat(apic_perf_irqs); @@ -3070,8 +3196,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) handled++; x86_pmu_handle_guest_pebs(regs, &data); - x86_pmu.drain_pebs(regs, &data); - status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; + static_call(x86_pmu_drain_pebs)(regs, &data); /* * PMI throttle may be triggered, which stops the PEBS event. @@ -3081,7 +3206,16 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * Update the MSR if pebs_enabled is changed. */ if (pebs_enabled != cpuc->pebs_enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + + /* + * Above PEBS handler (PEBS counters snapshotting) has updated fixed + * counter 3 and perf metrics counts if they are in counter group, + * unnecessary to update again. + */ + if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && + is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) + status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; } /* @@ -3098,9 +3232,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) { handled++; - static_call(intel_pmu_update_topdown_event)(NULL); + static_call(intel_pmu_update_topdown_event)(NULL, NULL); } + status &= hybrid(cpuc->pmu, intel_ctrl); + /* * Checkpointed counters can lead to 'spurious' PMIs because the * rollback caused by the PMI will have cleared the overflow status @@ -3110,22 +3246,45 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + u64 last_period; handled++; if (!test_bit(bit, cpuc->active_mask)) continue; + /* + * There may be unprocessed PEBS records in the PEBS buffer, + * which still stores the previous values. + * Process those records first before handling the latest value. + * For example, + * A is a regular counter + * B is a PEBS event which reads A + * C is a PEBS event + * + * The following can happen: + * B-assist A=1 + * C A=2 + * B-assist A=3 + * A-overflow-PMI A=4 + * C-assist-PMI (PEBS buffer) A=5 + * + * The PEBS buffer has to be drained before handling the A-PMI + */ + if (is_pebs_counter_event_group(event)) + x86_pmu.drain_pebs(regs, &data); + + last_period = event->hw.last_period; + if (!intel_pmu_save_and_restart(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); if (has_branch_stack(event)) intel_pmu_lbr_save_brstack(&data, cpuc, event); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } return handled; @@ -3687,10 +3846,9 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, if (cpuc->excl_cntrs) return intel_get_excl_constraints(cpuc, event, idx, c2); - /* Not all counters support the branch counter feature. */ - if (branch_sample_counters(event)) { + if (event->hw.dyn_constraint != ~0ULL) { c2 = dyn_constraint(cpuc, c2, idx); - c2->idxmsk64 &= x86_pmu.lbr_counters; + c2->idxmsk64 &= event->hw.dyn_constraint; c2->weight = hweight64(c2->idxmsk64); } @@ -4031,6 +4189,39 @@ end: return start; } +static inline bool intel_pmu_has_acr(struct pmu *pmu) +{ + return !!hybrid(pmu, acr_cause_mask64); +} + +static bool intel_pmu_is_acr_group(struct perf_event *event) +{ + /* The group leader has the ACR flag set */ + if (is_acr_event_group(event)) + return true; + + /* The acr_mask is set */ + if (event->attr.config2) + return true; + + return false; +} + +static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event, + u64 *cause_mask, int *num) +{ + event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64); + *cause_mask |= event->attr.config2; + *num += 1; +} + +static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event, + int idx, u64 cause_mask) +{ + if (test_bit(idx, (unsigned long *)&cause_mask)) + event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -4092,15 +4283,19 @@ static int intel_pmu_hw_config(struct perf_event *event) leader = event->group_leader; if (branch_sample_call_stack(leader)) return -EINVAL; - if (branch_sample_counters(leader)) + if (branch_sample_counters(leader)) { num++; + leader->hw.dyn_constraint &= x86_pmu.lbr_counters; + } leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; for_each_sibling_event(sibling, leader) { if (branch_sample_call_stack(sibling)) return -EINVAL; - if (branch_sample_counters(sibling)) + if (branch_sample_counters(sibling)) { num++; + sibling->hw.dyn_constraint &= x86_pmu.lbr_counters; + } } if (num > fls(x86_pmu.lbr_counters)) @@ -4148,6 +4343,101 @@ static int intel_pmu_hw_config(struct perf_event *event) event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT; } + if ((event->attr.sample_type & PERF_SAMPLE_READ) && + (x86_pmu.intel_cap.pebs_format >= 6) && + x86_pmu.intel_cap.pebs_baseline && + is_sampling_event(event) && + event->attr.precise_ip) + event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR; + + if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) { + struct perf_event *sibling, *leader = event->group_leader; + struct pmu *pmu = event->pmu; + bool has_sw_event = false; + int num = 0, idx = 0; + u64 cause_mask = 0; + + /* Not support perf metrics */ + if (is_metric_event(event)) + return -EINVAL; + + /* Not support freq mode */ + if (event->attr.freq) + return -EINVAL; + + /* PDist is not supported */ + if (event->attr.config2 && event->attr.precise_ip > 2) + return -EINVAL; + + /* The reload value cannot exceeds the max period */ + if (event->attr.sample_period > x86_pmu.max_period) + return -EINVAL; + /* + * The counter-constraints of each event cannot be finalized + * unless the whole group is scanned. However, it's hard + * to know whether the event is the last one of the group. + * Recalculate the counter-constraints for each event when + * adding a new event. + * + * The group is traversed twice, which may be optimized later. + * In the first round, + * - Find all events which do reload when other events + * overflow and set the corresponding counter-constraints + * - Add all events, which can cause other events reload, + * in the cause_mask + * - Error out if the number of events exceeds the HW limit + * - The ACR events must be contiguous. + * Error out if there are non-X86 events between ACR events. + * This is not a HW limit, but a SW limit. + * With the assumption, the intel_pmu_acr_late_setup() can + * easily convert the event idx to counter idx without + * traversing the whole event list. + */ + if (!is_x86_event(leader)) + return -EINVAL; + + if (leader->attr.config2) + intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num); + + if (leader->nr_siblings) { + for_each_sibling_event(sibling, leader) { + if (!is_x86_event(sibling)) { + has_sw_event = true; + continue; + } + if (!sibling->attr.config2) + continue; + if (has_sw_event) + return -EINVAL; + intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num); + } + } + if (leader != event && event->attr.config2) { + if (has_sw_event) + return -EINVAL; + intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num); + } + + if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) || + num > hweight64(hybrid(event->pmu, acr_cntr_mask64))) + return -EINVAL; + /* + * In the second round, apply the counter-constraints for + * the events which can cause other events reload. + */ + intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask); + + if (leader->nr_siblings) { + for_each_sibling_event(sibling, leader) + intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask); + } + + if (leader != event) + intel_pmu_set_acr_caused_constr(event, idx, cause_mask); + + leader->hw.flags |= PERF_X86_EVENT_ACR; + } + if ((event->attr.type == PERF_TYPE_HARDWARE) || (event->attr.type == PERF_TYPE_HW_CACHE)) return 0; @@ -4247,7 +4537,7 @@ static int intel_pmu_hw_config(struct perf_event *event) if (x86_pmu.version < 3) return -EINVAL; - ret = perf_allow_cpu(&event->attr); + ret = perf_allow_cpu(); if (ret) return ret; @@ -4295,7 +4585,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask, }; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return arr; /* @@ -4336,7 +4626,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) arr[pebs_enable] = (struct perf_guest_switch_msr){ .msr = MSR_IA32_PEBS_ENABLE, .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, - .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, }; if (arr[pebs_enable].host) { @@ -4685,9 +4975,9 @@ static int adl_hw_config(struct perf_event *event) return -EOPNOTSUPP; } -static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) +static enum intel_cpu_type adl_get_hybrid_cpu_type(void) { - return HYBRID_INTEL_CORE; + return INTEL_CPU_TYPE_CORE; } static inline bool erratum_hsw11(struct perf_event *event) @@ -4893,7 +5183,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) goto err; } - if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) { + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); @@ -4982,7 +5272,7 @@ static inline bool intel_pmu_broken_perf_cap(void) return false; } -static void update_pmu_cap(struct x86_hybrid_pmu *pmu) +static void update_pmu_cap(struct pmu *pmu) { unsigned int cntr, fixed_cntr, ecx, edx; union cpuid35_eax eax; @@ -4991,20 +5281,30 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); if (ebx.split.umask2) - pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2; + hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2; if (ebx.split.eq) - pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ; + hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ; if (eax.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, &cntr, &fixed_cntr, &ecx, &edx); - pmu->cntr_mask64 = cntr; - pmu->fixed_cntr_mask64 = fixed_cntr; + hybrid(pmu, cntr_mask64) = cntr; + hybrid(pmu, fixed_cntr_mask64) = fixed_cntr; + } + + if (eax.split.acr_subleaf) { + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF, + &cntr, &fixed_cntr, &ecx, &edx); + /* The mask of the counters which can be reloaded */ + hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED); + + /* The mask of the counters which can cause a reload of reloadable counters */ + hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED); } if (!intel_pmu_broken_perf_cap()) { /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ - rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); + rdmsrq(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities); } } @@ -5032,7 +5332,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) { - u8 cpu_type = get_this_hybrid_cpu_type(); + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + enum intel_cpu_type cpu_type = c->topo.intel_type; int i; /* @@ -5041,7 +5342,7 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) * on it. There should be a fixup function provided for these * troublesome CPUs (->get_hybrid_cpu_type). */ - if (cpu_type == HYBRID_INTEL_NONE) { + if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) { if (x86_pmu.get_hybrid_cpu_type) cpu_type = x86_pmu.get_hybrid_cpu_type(); else @@ -5058,16 +5359,16 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; u32 native_id; - if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big) + if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big) return &x86_pmu.hybrid_pmu[i]; - if (cpu_type == HYBRID_INTEL_ATOM) { + if (cpu_type == INTEL_CPU_TYPE_ATOM) { if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small) return &x86_pmu.hybrid_pmu[i]; - native_id = get_this_hybrid_cpu_native_id(); - if (native_id == skt_native_id && pmu_type == hybrid_small) + native_id = c->topo.intel_native_model_id; + if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small) return &x86_pmu.hybrid_pmu[i]; - if (native_id == cmt_native_id && pmu_type == hybrid_tiny) + if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny) return &x86_pmu.hybrid_pmu[i]; } } @@ -5090,7 +5391,7 @@ static bool init_hybrid_pmu(int cpu) goto end; if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) - update_pmu_cap(pmu); + update_pmu_cap(&pmu->pmu); intel_pmu_check_hybrid_pmus(pmu); @@ -5151,7 +5452,7 @@ static void intel_pmu_cpu_starting(int cpu) if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) { union perf_capabilities perf_cap; - rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities); + rdmsrq(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities); if (!perf_cap.perf_metrics) { x86_pmu.intel_cap.perf_metrics = 0; x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); @@ -5244,16 +5545,10 @@ static void intel_pmu_cpu_dead(int cpu) } static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, - bool sched_in) + struct task_struct *task, bool sched_in) { intel_pmu_pebs_sched_task(pmu_ctx, sched_in); - intel_pmu_lbr_sched_task(pmu_ctx, sched_in); -} - -static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) -{ - intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc); + intel_pmu_lbr_sched_task(pmu_ctx, task, sched_in); } static int intel_pmu_check_period(struct perf_event *event, u64 value) @@ -5424,7 +5719,6 @@ static __initconst const struct x86_pmu intel_pmu = { .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, - .swap_task_ctx = intel_pmu_swap_task_ctx, .check_period = intel_pmu_check_period, @@ -5471,7 +5765,7 @@ static __init void intel_clovertown_quirk(void) * these chips. */ pr_warn("PEBS disabled due to CPU errata\n"); - x86_pmu.pebs = 0; + x86_pmu.ds_pebs = 0; x86_pmu.pebs_constraints = NULL; } @@ -5566,24 +5860,24 @@ static bool check_msr(unsigned long msr, u64 mask) * matches, this is needed to detect certain hardware emulators * (qemu/kvm) that don't trap on the MSR access and always return 0s. */ - if (rdmsrl_safe(msr, &val_old)) + if (rdmsrq_safe(msr, &val_old)) return false; /* - * Only change the bits which can be updated by wrmsrl. + * Only change the bits which can be updated by wrmsrq. */ val_tmp = val_old ^ mask; if (is_lbr_from(msr)) val_tmp = lbr_from_signext_quirk_wr(val_tmp); - if (wrmsrl_safe(msr, val_tmp) || - rdmsrl_safe(msr, &val_new)) + if (wrmsrq_safe(msr, val_tmp) || + rdmsrq_safe(msr, &val_new)) return false; /* - * Quirk only affects validation in wrmsr(), so wrmsrl()'s value - * should equal rdmsrl()'s even with the quirk. + * Quirk only affects validation in wrmsr(), so wrmsrq()'s value + * should equal rdmsrq()'s even with the quirk. */ if (val_new != val_tmp) return false; @@ -5594,7 +5888,7 @@ static bool check_msr(unsigned long msr, u64 mask) /* Here it's sure that the MSR can be safely accessed. * Restore the old value and return. */ - wrmsrl(msr, val_old); + wrmsrq(msr, val_old); return true; } @@ -5959,7 +6253,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) static umode_t pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - return x86_pmu.pebs ? attr->mode : 0; + return x86_pmu.ds_pebs ? attr->mode : 0; } static umode_t @@ -5990,6 +6284,21 @@ td_is_visible(struct kobject *kobj, struct attribute *attr, int i) return attr->mode; } +PMU_FORMAT_ATTR(acr_mask, "config2:0-63"); + +static struct attribute *format_acr_attrs[] = { + &format_attr_acr_mask.attr, + NULL +}; + +static umode_t +acr_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + + return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0; +} + static struct attribute_group group_events_td = { .name = "events", .is_visible = td_is_visible, @@ -6032,6 +6341,12 @@ static struct attribute_group group_format_evtsel_ext = { .is_visible = evtsel_ext_is_visible, }; +static struct attribute_group group_format_acr = { + .name = "format", + .attrs = format_acr_attrs, + .is_visible = acr_is_visible, +}; + static struct attribute_group group_default = { .attrs = intel_pmu_attrs, .is_visible = default_is_visible, @@ -6046,6 +6361,7 @@ static const struct attribute_group *attr_update[] = { &group_format_extra, &group_format_extra_skl, &group_format_evtsel_ext, + &group_format_acr, &group_default, NULL, }; @@ -6330,6 +6646,7 @@ static const struct attribute_group *hybrid_attr_update[] = { &group_caps_lbr, &hybrid_group_format_extra, &group_format_evtsel_ext, + &group_format_acr, &group_default, &hybrid_group_cpus, NULL, @@ -6522,6 +6839,7 @@ static __always_inline void intel_pmu_init_skt(struct pmu *pmu) intel_pmu_init_grt(pmu); hybrid(pmu, event_constraints) = intel_skt_event_constraints; hybrid(pmu, extra_regs) = intel_cmt_extra_regs; + static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); } __init int intel_pmu_init(void) @@ -6540,15 +6858,21 @@ __init int intel_pmu_init(void) char *name; struct x86_hybrid_pmu *pmu; + /* Architectural Perfmon was introduced starting with Core "Yonah" */ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { - case 0x6: - return p6_pmu_init(); - case 0xb: + case 6: + if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH) + return p6_pmu_init(); + break; + case 11: return knc_pmu_init(); - case 0xf: + case 15: return p4_pmu_init(); } + + pr_cont("unsupported CPU family %d model %d ", + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } @@ -6576,6 +6900,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64); x86_pmu.pebs_capable = PEBS_COUNTER_MASK; + x86_pmu.config_mask = X86_RAW_EVENT_MASK; /* * Quirk: v2 perfmon does not report fixed-purpose events, so @@ -6592,7 +6917,7 @@ __init int intel_pmu_init(void) if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; - rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); + rdmsrq(MSR_IA32_PERF_CAPABILITIES, capabilities); x86_pmu.intel_cap.capabilities = capabilities; } @@ -6604,7 +6929,7 @@ __init int intel_pmu_init(void) if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) intel_pmu_arch_lbr_init(); - intel_ds_init(); + intel_pebs_init(); x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ @@ -6615,6 +6940,12 @@ __init int intel_pmu_init(void) } /* + * Many features on and after V6 require dynamic constraint, + * e.g., Arch PEBS, ACR. + */ + if (version >= 6) + x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT; + /* * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_vfm) { @@ -6696,7 +7027,7 @@ __init int intel_pmu_init(void) case INTEL_ATOM_SILVERMONT_D: case INTEL_ATOM_SILVERMONT_MID: case INTEL_ATOM_AIRMONT: - case INTEL_ATOM_AIRMONT_MID: + case INTEL_ATOM_SILVERMONT_MID2: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -6825,6 +7156,18 @@ __init int intel_pmu_init(void) name = "crestmont"; break; + case INTEL_ATOM_DARKMONT_X: + intel_pmu_init_skt(NULL); + intel_pmu_pebs_data_source_cmt(); + x86_pmu.pebs_latency_data = cmt_latency_data; + x86_pmu.get_event_constraints = cmt_get_event_constraints; + td_attr = skt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = cmt_format_attr; + pr_cont("Darkmont events, "); + name = "darkmont"; + break; + case INTEL_WESTMERE: case INTEL_WESTMERE_EP: case INTEL_WESTMERE_EX: @@ -7255,8 +7598,17 @@ __init int intel_pmu_init(void) name = "meteorlake_hybrid"; break; + case INTEL_PANTHERLAKE_L: + pr_cont("Pantherlake Hybrid events, "); + name = "pantherlake_hybrid"; + goto lnl_common; + case INTEL_LUNARLAKE_M: case INTEL_ARROWLAKE: + pr_cont("Lunarlake Hybrid events, "); + name = "lunarlake_hybrid"; + + lnl_common: intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = lnl_latency_data; @@ -7278,8 +7630,6 @@ __init int intel_pmu_init(void) intel_pmu_init_skt(&pmu->pmu); intel_pmu_pebs_data_source_lnl(); - pr_cont("Lunarlake Hybrid events, "); - name = "lunarlake_hybrid"; break; case INTEL_ARROWLAKE_H: @@ -7367,6 +7717,18 @@ __init int intel_pmu_init(void) x86_pmu.attr_update = hybrid_attr_update; } + /* + * The archPerfmonExt (0x23) includes an enhanced enumeration of + * PMU architectural features with a per-core view. For non-hybrid, + * each core has the same PMU capabilities. It's good enough to + * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu + * is used to keep the common capabilities. Still keep the values + * from the leaf 0xa. The core specific update will be done later + * when a new type is online. + */ + if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) + update_pmu_cap(NULL); + intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64, &x86_pmu.fixed_cntr_mask64, &x86_pmu.intel_ctrl); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index ae4ec16156bb..ec753e39b007 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -111,6 +111,7 @@ #include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/msr.h> #include "../perf_event.h" #include "../probe.h" @@ -320,7 +321,7 @@ static inline u64 cstate_pmu_read_counter(struct perf_event *event) { u64 val; - rdmsrl(event->hw.event_base, val); + rdmsrq(event->hw.event_base, val); return val; } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index f122882ef278..c0b7ac1c7594 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -10,6 +10,7 @@ #include <asm/tlbflush.h> #include <asm/insn.h> #include <asm/io.h> +#include <asm/msr.h> #include <asm/timer.h> #include "../perf_event.h" @@ -624,7 +625,7 @@ static int alloc_pebs_buffer(int cpu) int max, node = cpu_to_node(cpu); void *buffer, *insn_buff, *cea; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return 0; buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); @@ -659,7 +660,7 @@ static void release_pebs_buffer(int cpu) struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); void *cea; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return; kfree(per_cpu(insn_buffer, cpu)); @@ -734,7 +735,7 @@ void release_ds_buffers(void) { int cpu; - if (!x86_pmu.bts && !x86_pmu.pebs) + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; for_each_possible_cpu(cpu) @@ -750,7 +751,8 @@ void release_ds_buffers(void) } for_each_possible_cpu(cpu) { - release_pebs_buffer(cpu); + if (x86_pmu.ds_pebs) + release_pebs_buffer(cpu); release_bts_buffer(cpu); } } @@ -761,15 +763,17 @@ void reserve_ds_buffers(void) int cpu; x86_pmu.bts_active = 0; - x86_pmu.pebs_active = 0; - if (!x86_pmu.bts && !x86_pmu.pebs) + if (x86_pmu.ds_pebs) + x86_pmu.pebs_active = 0; + + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; if (!x86_pmu.bts) bts_err = 1; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) pebs_err = 1; for_each_possible_cpu(cpu) { @@ -781,7 +785,8 @@ void reserve_ds_buffers(void) if (!bts_err && alloc_bts_buffer(cpu)) bts_err = 1; - if (!pebs_err && alloc_pebs_buffer(cpu)) + if (x86_pmu.ds_pebs && !pebs_err && + alloc_pebs_buffer(cpu)) pebs_err = 1; if (bts_err && pebs_err) @@ -793,7 +798,7 @@ void reserve_ds_buffers(void) release_bts_buffer(cpu); } - if (pebs_err) { + if (x86_pmu.ds_pebs && pebs_err) { for_each_possible_cpu(cpu) release_pebs_buffer(cpu); } @@ -805,7 +810,7 @@ void reserve_ds_buffers(void) if (x86_pmu.bts && !bts_err) x86_pmu.bts_active = 1; - if (x86_pmu.pebs && !pebs_err) + if (x86_pmu.ds_pebs && !pebs_err) x86_pmu.pebs_active = 1; for_each_possible_cpu(cpu) { @@ -953,11 +958,11 @@ unlock: return 1; } -static inline void intel_pmu_drain_pebs_buffer(void) +void intel_pmu_drain_pebs_buffer(void) { struct perf_sample_data data; - x86_pmu.drain_pebs(NULL, &data); + static_call(x86_pmu_drain_pebs)(NULL, &data); } /* @@ -1294,6 +1299,19 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) ds->pebs_interrupt_threshold = threshold; } +#define PEBS_DATACFG_CNTRS(x) \ + ((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK) + +#define PEBS_DATACFG_CNTR_BIT(x) \ + (((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT) + +#define PEBS_DATACFG_FIX(x) \ + ((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK) + +#define PEBS_DATACFG_FIX_BIT(x) \ + (((1ULL << (x)) & PEBS_DATACFG_FIX_MASK) \ + << PEBS_DATACFG_FIX_SHIFT) + static void adaptive_pebs_record_size_update(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1308,10 +1326,57 @@ static void adaptive_pebs_record_size_update(void) sz += sizeof(struct pebs_xmm); if (pebs_data_cfg & PEBS_DATACFG_LBRS) sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry); + if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) { + sz += sizeof(struct pebs_cntr_header); + + /* Metrics base and Metrics Data */ + if (pebs_data_cfg & PEBS_DATACFG_METRICS) + sz += 2 * sizeof(u64); + + if (pebs_data_cfg & PEBS_DATACFG_CNTR) { + sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) + + hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) * + sizeof(u64); + } + } cpuc->pebs_record_size = sz; } +static void __intel_pmu_pebs_update_cfg(struct perf_event *event, + int idx, u64 *pebs_data_cfg) +{ + if (is_metric_event(event)) { + *pebs_data_cfg |= PEBS_DATACFG_METRICS; + return; + } + + *pebs_data_cfg |= PEBS_DATACFG_CNTR; + + if (idx >= INTEL_PMC_IDX_FIXED) + *pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED); + else + *pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx); +} + + +void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc) +{ + struct perf_event *event; + u64 pebs_data_cfg = 0; + int i; + + for (i = 0; i < cpuc->n_events; i++) { + event = cpuc->event_list[i]; + if (!is_pebs_counter_event_group(event)) + continue; + __intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg); + } + + if (pebs_data_cfg & ~cpuc->pebs_data_cfg) + cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW; +} + #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_WEIGHT_TYPE | \ @@ -1338,8 +1403,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) * + precise_ip < 2 for the non event IP * + For RTM TSX weight we need GPRs for the abort code. */ - gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_GP_REGS); + gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS)) || + ((sample_type & PERF_SAMPLE_REGS_USER) && + (attr->sample_regs_user & PEBS_GP_REGS)); tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == @@ -1454,7 +1521,7 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) else value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; } - wrmsrl(base + idx, value); + wrmsrq(base + idx, value); } static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc) @@ -1491,7 +1558,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) */ intel_pmu_drain_pebs_buffer(); adaptive_pebs_record_size_update(); - wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg); + wrmsrq(MSR_PEBS_DATA_CFG, pebs_data_cfg); cpuc->active_pebs_data_cfg = pebs_data_cfg; } } @@ -1554,7 +1621,7 @@ void intel_pmu_pebs_disable(struct perf_event *event) intel_pmu_pebs_via_pt_disable(event); if (cpuc->enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; } @@ -1564,7 +1631,7 @@ void intel_pmu_pebs_enable_all(void) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); } void intel_pmu_pebs_disable_all(void) @@ -1765,8 +1832,6 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); - data->period = event->hw.last_period; - /* * Use latency for weight (only avail with PEBS-LL) */ @@ -1914,12 +1979,89 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs, #endif } +static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc) +{ + int shift = 64 - x86_pmu.cntval_bits; + struct hw_perf_event *hwc; + u64 delta, prev_pmc; + + /* + * A recorded counter may not have an assigned event in the + * following cases. The value should be dropped. + * - An event is deleted. There is still an active PEBS event. + * The PEBS record doesn't shrink on pmu::del(). + * If the counter of the deleted event once occurred in a PEBS + * record, PEBS still records the counter until the counter is + * reassigned. + * - An event is stopped for some reason, e.g., throttled. + * During this period, another event is added and takes the + * counter of the stopped event. The stopped event is assigned + * to another new and uninitialized counter, since the + * x86_pmu_start(RELOAD) is not invoked for a stopped event. + * The PEBS__DATA_CFG is updated regardless of the event state. + * The uninitialized counter can be recorded in a PEBS record. + * But the cpuc->events[uninitialized_counter] is always NULL, + * because the event is stopped. The uninitialized value is + * safely dropped. + */ + if (!event) + return; + + hwc = &event->hw; + prev_pmc = local64_read(&hwc->prev_count); + + /* Only update the count when the PMU is disabled */ + WARN_ON(this_cpu_read(cpu_hw_events.enabled)); + local64_set(&hwc->prev_count, pmc); + + delta = (pmc << shift) - (prev_pmc << shift); + delta >>= shift; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); +} + +static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc, + struct perf_event *event, + struct pebs_cntr_header *cntr, + void *next_record) +{ + int bit; + + for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) { + intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record); + next_record += sizeof(u64); + } + + for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) { + /* The slots event will be handled with perf_metric later */ + if ((cntr->metrics == INTEL_CNTR_METRICS) && + (bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) { + next_record += sizeof(u64); + continue; + } + intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED], + *(u64 *)next_record); + next_record += sizeof(u64); + } + + /* HW will reload the value right after the overflow. */ + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period); + + if (cntr->metrics == INTEL_CNTR_METRICS) { + static_call(intel_pmu_update_topdown_event) + (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS], + (u64 *)next_record); + next_record += 2 * sizeof(u64); + } +} + #define PEBS_LATENCY_MASK 0xffff /* * With adaptive PEBS the layout depends on what fields are configured. */ - static void setup_pebs_adaptive_sample_data(struct perf_event *event, struct pt_regs *iregs, void *__pebs, struct perf_sample_data *data, @@ -1942,7 +2084,6 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, sample_type = event->attr.sample_type; format_group = basic->format_group; perf_sample_data_init(data, 0, event->hw.last_period); - data->period = event->hw.last_period; setup_pebs_time(event, data, basic->tsc); @@ -1985,7 +2126,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, regs->flags &= ~PERF_EFLAGS_EXACT; } - if (sample_type & PERF_SAMPLE_REGS_INTR) + if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) adaptive_pebs_save_regs(regs, gprs); } @@ -2049,6 +2190,28 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, } } + if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) { + struct pebs_cntr_header *cntr = next_record; + unsigned int nr; + + next_record += sizeof(struct pebs_cntr_header); + /* + * The PEBS_DATA_CFG is a global register, which is the + * superset configuration for all PEBS events. + * For the PEBS record of non-sample-read group, ignore + * the counter snapshot fields. + */ + if (is_pebs_counter_event_group(event)) { + __setup_pebs_counter_group(cpuc, event, cntr, next_record); + data->sample_flags |= PERF_SAMPLE_READ; + } + + nr = hweight32(cntr->cntr) + hweight32(cntr->fixed); + if (cntr->metrics == INTEL_CNTR_METRICS) + nr += 2; + next_record += nr * sizeof(u64); + } + WARN_ONCE(next_record != __pebs + basic->format_size, "PEBS record size %u, expected %llu, config %llx\n", basic->format_size, @@ -2094,15 +2257,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) return NULL; } -void intel_pmu_auto_reload_read(struct perf_event *event) -{ - WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); - - perf_pmu_disable(event->pmu); - intel_pmu_drain_pebs_buffer(); - perf_pmu_enable(event->pmu); -} - /* * Special variant of intel_pmu_save_and_restart() for auto-reload. */ @@ -2123,7 +2277,7 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) WARN_ON(this_cpu_read(cpu_hw_events.enabled)); prev_raw_count = local64_read(&hwc->prev_count); - rdpmcl(hwc->event_base_rdpmc, new_raw_count); + new_raw_count = rdpmc(hwc->event_base_rdpmc); local64_set(&hwc->prev_count, new_raw_count); /* @@ -2206,20 +2360,44 @@ __intel_pmu_pebs_last_event(struct perf_event *event, * All but the last records are processed. * The last one is left to be able to call the overflow handler. */ - if (perf_event_overflow(event, data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, data, regs); } if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { + if ((is_pebs_counter_event_group(event))) { + /* + * The value of each sample has been updated when setup + * the corresponding sample data. + */ + perf_event_update_userpage(event); + } else { + /* + * Now, auto-reload is only enabled in fixed period mode. + * The reload value is always hwc->sample_period. + * May need to change it, if auto-reload is enabled in + * freq mode later. + */ + intel_pmu_save_and_restart_reload(event, count); + } + } else { /* - * Now, auto-reload is only enabled in fixed period mode. - * The reload value is always hwc->sample_period. - * May need to change it, if auto-reload is enabled in - * freq mode later. + * For a non-precise event, it's possible the + * counters-snapshotting records a positive value for the + * overflowed event. Then the HW auto-reload mechanism + * reset the counter to 0 immediately, because the + * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD + * is not set. The counter backwards may be observed in a + * PMI handler. + * + * Since the event value has been updated when processing the + * counters-snapshotting record, only needs to set the new + * period for the counter. */ - intel_pmu_save_and_restart_reload(event, count); - } else - intel_pmu_save_and_restart(event); + if (is_pebs_counter_event_group(event)) + static_call(x86_pmu_set_period)(event); + else + intel_pmu_save_and_restart(event); + } } static __always_inline void @@ -2287,8 +2465,9 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_ setup_pebs_fixed_sample_data); } -static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask) { + u64 pebs_enabled = cpuc->pebs_enabled & mask; struct perf_event *event; int bit; @@ -2299,7 +2478,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int * It needs to call intel_pmu_save_and_restart_reload() to * update the event->count for this case. */ - for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { + for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) { event = cpuc->events[bit]; if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) intel_pmu_save_and_restart_reload(event, 0); @@ -2334,7 +2513,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d } if (unlikely(base >= top)) { - intel_pmu_pebs_event_update_no_drain(cpuc, size); + intel_pmu_pebs_event_update_no_drain(cpuc, mask); return; } @@ -2410,8 +2589,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (iregs && perf_event_account_interrupt(event)) - x86_pmu_stop(event, 0); + if (iregs) + perf_event_account_interrupt(event); } if (counts[bit]) { @@ -2448,7 +2627,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED); if (unlikely(base >= top)) { - intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX); + intel_pmu_pebs_event_update_no_drain(cpuc, mask); return; } @@ -2491,10 +2670,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d } /* - * BTS, PEBS probe and setup + * PEBS probe and setup */ -void __init intel_ds_init(void) +void __init intel_pebs_init(void) { /* * No support for 32bit formats @@ -2502,13 +2681,12 @@ void __init intel_ds_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64)) return; - x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); - x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - if (x86_pmu.pebs) { + if (x86_pmu.ds_pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; int format = x86_pmu.intel_cap.pebs_format; @@ -2516,6 +2694,11 @@ void __init intel_ds_init(void) if (format < 4) x86_pmu.intel_cap.pebs_baseline = 0; + x86_pmu.pebs_enable = intel_pmu_pebs_enable; + x86_pmu.pebs_disable = intel_pmu_pebs_disable; + x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all; + x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all; + switch (format) { case 0: pr_cont("PEBS fmt0%c, ", pebs_type); @@ -2552,6 +2735,11 @@ void __init intel_ds_init(void) break; case 6: + if (x86_pmu.intel_cap.pebs_baseline) { + x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ; + x86_pmu.late_setup = intel_pmu_late_setup; + } + fallthrough; case 5: x86_pmu.pebs_ept = 1; fallthrough; @@ -2576,7 +2764,7 @@ void __init intel_ds_init(void) PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); } - pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); + pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual); /* * The PEBS-via-PT is not supported on hybrid platforms, @@ -2595,7 +2783,7 @@ void __init intel_ds_init(void) default: pr_cont("no PEBS fmt%d%c, ", format, pebs_type); - x86_pmu.pebs = 0; + x86_pmu.ds_pebs = 0; } } } @@ -2604,8 +2792,8 @@ void perf_restore_debug_store(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); - if (!x86_pmu.bts && !x86_pmu.pebs) + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; - wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); + wrmsrq(MSR_IA32_DS_AREA, (unsigned long)ds); } diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c index 034a1f6a457c..e614baf42926 100644 --- a/arch/x86/events/intel/knc.c +++ b/arch/x86/events/intel/knc.c @@ -5,6 +5,7 @@ #include <linux/types.h> #include <asm/hardirq.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -159,18 +160,18 @@ static void knc_pmu_disable_all(void) { u64 val; - rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); - wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); } static void knc_pmu_enable_all(int added) { u64 val; - rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); - wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); } static inline void @@ -182,7 +183,7 @@ knc_pmu_disable_event(struct perf_event *event) val = hwc->config; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); + (void)wrmsrq_safe(hwc->config_base + hwc->idx, val); } static void knc_pmu_enable_event(struct perf_event *event) @@ -193,21 +194,21 @@ static void knc_pmu_enable_event(struct perf_event *event) val = hwc->config; val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); + (void)wrmsrq_safe(hwc->config_base + hwc->idx, val); } static inline u64 knc_pmu_get_status(void) { u64 status; - rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); + rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); return status; } static inline void knc_pmu_ack_status(u64 ack) { - wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); + wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); } static int knc_pmu_handle_irq(struct pt_regs *regs) @@ -241,19 +242,20 @@ again: for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + u64 last_period; handled++; if (!test_bit(bit, cpuc->active_mask)) continue; + last_period = event->hw.last_period; if (!intel_pmu_save_and_restart(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index dc641b50814e..7aa59966e7c3 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -137,9 +137,9 @@ static void __intel_pmu_lbr_enable(bool pmi) if (cpuc->lbr_sel) lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) - wrmsrl(MSR_LBR_SELECT, lbr_select); + wrmsrq(MSR_LBR_SELECT, lbr_select); - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); orig_debugctl = debugctl; if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) @@ -155,10 +155,10 @@ static void __intel_pmu_lbr_enable(bool pmi) debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; if (orig_debugctl != debugctl) - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); if (static_cpu_has(X86_FEATURE_ARCH_LBR)) - wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); + wrmsrq(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); } void intel_pmu_lbr_reset_32(void) @@ -166,7 +166,7 @@ void intel_pmu_lbr_reset_32(void) int i; for (i = 0; i < x86_pmu.lbr_nr; i++) - wrmsrl(x86_pmu.lbr_from + i, 0); + wrmsrq(x86_pmu.lbr_from + i, 0); } void intel_pmu_lbr_reset_64(void) @@ -174,17 +174,17 @@ void intel_pmu_lbr_reset_64(void) int i; for (i = 0; i < x86_pmu.lbr_nr; i++) { - wrmsrl(x86_pmu.lbr_from + i, 0); - wrmsrl(x86_pmu.lbr_to + i, 0); + wrmsrq(x86_pmu.lbr_from + i, 0); + wrmsrq(x86_pmu.lbr_to + i, 0); if (x86_pmu.lbr_has_info) - wrmsrl(x86_pmu.lbr_info + i, 0); + wrmsrq(x86_pmu.lbr_info + i, 0); } } static void intel_pmu_arch_lbr_reset(void) { /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ - wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); + wrmsrq(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); } void intel_pmu_lbr_reset(void) @@ -199,7 +199,7 @@ void intel_pmu_lbr_reset(void) cpuc->last_task_ctx = NULL; cpuc->last_log_id = 0; if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) - wrmsrl(MSR_LBR_SELECT, 0); + wrmsrq(MSR_LBR_SELECT, 0); } /* @@ -209,7 +209,7 @@ static inline u64 intel_pmu_lbr_tos(void) { u64 tos; - rdmsrl(x86_pmu.lbr_tos, tos); + rdmsrq(x86_pmu.lbr_tos, tos); return tos; } @@ -282,17 +282,17 @@ static u64 lbr_from_signext_quirk_rd(u64 val) static __always_inline void wrlbr_from(unsigned int idx, u64 val) { val = lbr_from_signext_quirk_wr(val); - wrmsrl(x86_pmu.lbr_from + idx, val); + wrmsrq(x86_pmu.lbr_from + idx, val); } static __always_inline void wrlbr_to(unsigned int idx, u64 val) { - wrmsrl(x86_pmu.lbr_to + idx, val); + wrmsrq(x86_pmu.lbr_to + idx, val); } static __always_inline void wrlbr_info(unsigned int idx, u64 val) { - wrmsrl(x86_pmu.lbr_info + idx, val); + wrmsrq(x86_pmu.lbr_info + idx, val); } static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) @@ -302,7 +302,7 @@ static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) if (lbr) return lbr->from; - rdmsrl(x86_pmu.lbr_from + idx, val); + rdmsrq(x86_pmu.lbr_from + idx, val); return lbr_from_signext_quirk_rd(val); } @@ -314,7 +314,7 @@ static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) if (lbr) return lbr->to; - rdmsrl(x86_pmu.lbr_to + idx, val); + rdmsrq(x86_pmu.lbr_to + idx, val); return val; } @@ -326,7 +326,7 @@ static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) if (lbr) return lbr->info; - rdmsrl(x86_pmu.lbr_info + idx, val); + rdmsrq(x86_pmu.lbr_info + idx, val); return val; } @@ -380,10 +380,10 @@ void intel_pmu_lbr_restore(void *ctx) wrlbr_info(lbr_idx, 0); } - wrmsrl(x86_pmu.lbr_tos, tos); + wrmsrq(x86_pmu.lbr_tos, tos); if (cpuc->lbr_select) - wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); + wrmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel); } static void intel_pmu_arch_lbr_restore(void *ctx) @@ -422,11 +422,17 @@ static __always_inline bool lbr_is_reset_in_cstate(void *ctx) return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); } +static inline bool has_lbr_callstack_users(void *ctx) +{ + return task_context_opt(ctx)->lbr_callstack_users || + x86_pmu.lbr_callstack_users; +} + static void __intel_pmu_lbr_restore(void *ctx) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (task_context_opt(ctx)->lbr_callstack_users == 0 || + if (!has_lbr_callstack_users(ctx) || task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { intel_pmu_lbr_reset(); return; @@ -469,7 +475,7 @@ void intel_pmu_lbr_save(void *ctx) task_ctx->tos = tos; if (cpuc->lbr_select) - rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); + rdmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel); } static void intel_pmu_arch_lbr_save(void *ctx) @@ -503,7 +509,7 @@ static void __intel_pmu_lbr_save(void *ctx) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (task_context_opt(ctx)->lbr_callstack_users == 0) { + if (!has_lbr_callstack_users(ctx)) { task_context_opt(ctx)->lbr_stack_state = LBR_NONE; return; } @@ -516,32 +522,11 @@ static void __intel_pmu_lbr_save(void *ctx) cpuc->last_log_id = ++task_context_opt(ctx)->log_id; } -void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) -{ - void *prev_ctx_data, *next_ctx_data; - - swap(prev_epc->task_ctx_data, next_epc->task_ctx_data); - - /* - * Architecture specific synchronization makes sense in case - * both prev_epc->task_ctx_data and next_epc->task_ctx_data - * pointers are allocated. - */ - - prev_ctx_data = next_epc->task_ctx_data; - next_ctx_data = prev_epc->task_ctx_data; - - if (!prev_ctx_data || !next_ctx_data) - return; - - swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, - task_context_opt(next_ctx_data)->lbr_callstack_users); -} - -void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_ctx_data *ctx_data; void *task_ctx; if (!cpuc->lbr_users) @@ -552,14 +537,18 @@ void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched * the task was scheduled out, restore the stack. Otherwise flush * the LBR stack. */ - task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + task_ctx = ctx_data ? ctx_data->data : NULL; if (task_ctx) { if (sched_in) __intel_pmu_lbr_restore(task_ctx); else __intel_pmu_lbr_save(task_ctx); + rcu_read_unlock(); return; } + rcu_read_unlock(); /* * Since a context switch can flip the address space and LBR entries @@ -588,9 +577,19 @@ void intel_pmu_lbr_add(struct perf_event *event) cpuc->br_sel = event->hw.branch_reg.reg; - if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data) - task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++; + if (branch_user_callstack(cpuc->br_sel)) { + if (event->attach_state & PERF_ATTACH_TASK) { + struct task_struct *task = event->hw.target; + struct perf_ctx_data *ctx_data; + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + if (ctx_data) + task_context_opt(ctx_data->data)->lbr_callstack_users++; + rcu_read_unlock(); + } else + x86_pmu.lbr_callstack_users++; + } /* * Request pmu::sched_task() callback, which will fire inside the * regular perf event scheduling, so that call will: @@ -664,9 +663,19 @@ void intel_pmu_lbr_del(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - if (branch_user_callstack(cpuc->br_sel) && - event->pmu_ctx->task_ctx_data) - task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--; + if (branch_user_callstack(cpuc->br_sel)) { + if (event->attach_state & PERF_ATTACH_TASK) { + struct task_struct *task = event->hw.target; + struct perf_ctx_data *ctx_data; + + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + if (ctx_data) + task_context_opt(ctx_data->data)->lbr_callstack_users--; + rcu_read_unlock(); + } else + x86_pmu.lbr_callstack_users--; + } if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) cpuc->lbr_select = 0; @@ -743,7 +752,7 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) u64 lbr; } msr_lastbranch; - rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); + rdmsrq(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); perf_clear_branch_entry_bitfields(br); @@ -1593,7 +1602,7 @@ void __init intel_pmu_arch_lbr_init(void) goto clear_arch_lbr; /* Apply the max depth of Arch LBR */ - if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) + if (wrmsrq_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) goto clear_arch_lbr; x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; @@ -1609,7 +1618,7 @@ void __init intel_pmu_arch_lbr_init(void) x86_pmu.lbr_nr = lbr_nr; if (!!x86_pmu.lbr_counters) - x86_pmu.flags |= PMU_FL_BR_CNTR; + x86_pmu.flags |= PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT; if (x86_pmu.lbr_mispred) static_branch_enable(&x86_lbr_mispred); diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 844bc4fc4724..e5fd7367e45d 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -10,8 +10,10 @@ #include <linux/perf_event.h> #include <asm/perf_event_p4.h> +#include <asm/cpu_device_id.h> #include <asm/hardirq.h> #include <asm/apic.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -732,9 +734,9 @@ static bool p4_event_match_cpu_model(unsigned int event_idx) { /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ if (event_idx == P4_EVENT_INSTR_COMPLETED) { - if (boot_cpu_data.x86_model != 3 && - boot_cpu_data.x86_model != 4 && - boot_cpu_data.x86_model != 6) + if (boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT && + boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT_2M && + boot_cpu_data.x86_vfm != INTEL_P4_CEDARMILL) return false; } @@ -776,7 +778,7 @@ static int p4_validate_raw_event(struct perf_event *event) * the user needs special permissions to be able to use it */ if (p4_ht_active() && p4_event_bind_map[v].shared) { - v = perf_allow_cpu(&event->attr); + v = perf_allow_cpu(); if (v) return v; } @@ -858,9 +860,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) u64 v; /* an official way for overflow indication */ - rdmsrl(hwc->config_base, v); + rdmsrq(hwc->config_base, v); if (v & P4_CCCR_OVF) { - wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); + wrmsrq(hwc->config_base, v & ~P4_CCCR_OVF); return 1; } @@ -871,7 +873,7 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) * the counter has reached zero value and continued counting before * real NMI signal was received: */ - rdmsrl(hwc->event_base, v); + rdmsrq(hwc->event_base, v); if (!(v & ARCH_P4_UNFLAGGED_BIT)) return 1; @@ -896,8 +898,8 @@ static void p4_pmu_disable_pebs(void) * So at moment let leave metrics turned on forever -- it's * ok for now but need to be revisited! * - * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0); - * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0); + * (void)wrmsrq_safe(MSR_IA32_PEBS_ENABLE, 0); + * (void)wrmsrq_safe(MSR_P4_PEBS_MATRIX_VERT, 0); */ } @@ -910,7 +912,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) * state we need to clear P4_CCCR_OVF, otherwise interrupt get * asserted again and again */ - (void)wrmsrl_safe(hwc->config_base, + (void)wrmsrq_safe(hwc->config_base, p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); } @@ -943,8 +945,8 @@ static void p4_pmu_enable_pebs(u64 config) bind = &p4_pebs_bind_map[idx]; - (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); - (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); + (void)wrmsrq_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); + (void)wrmsrq_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); } static void __p4_pmu_enable_event(struct perf_event *event) @@ -978,8 +980,8 @@ static void __p4_pmu_enable_event(struct perf_event *event) */ p4_pmu_enable_pebs(hwc->config); - (void)wrmsrl_safe(escr_addr, escr_conf); - (void)wrmsrl_safe(hwc->config_base, + (void)wrmsrq_safe(escr_addr, escr_conf); + (void)wrmsrq_safe(hwc->config_base, (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } @@ -1023,7 +1025,7 @@ static int p4_pmu_set_period(struct perf_event *event) * * the former idea is taken from OProfile code */ - wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + wrmsrq(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); } return ret; @@ -1071,8 +1073,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) continue; - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } if (handled) @@ -1397,7 +1398,7 @@ __init int p4_pmu_init(void) */ for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { reg = x86_pmu_config_addr(i); - wrmsrl_safe(reg, 0ULL); + wrmsrq_safe(reg, 0ULL); } return 0; diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index a6cffb4f4ef5..6e41de355bd8 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -2,6 +2,9 @@ #include <linux/perf_event.h> #include <linux/types.h> +#include <asm/cpu_device_id.h> +#include <asm/msr.h> + #include "../perf_event.h" /* @@ -140,9 +143,9 @@ static void p6_pmu_disable_all(void) u64 val; /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); + rdmsrq(MSR_P6_EVNTSEL0, val); val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); + wrmsrq(MSR_P6_EVNTSEL0, val); } static void p6_pmu_enable_all(int added) @@ -150,9 +153,9 @@ static void p6_pmu_enable_all(int added) unsigned long val; /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); + rdmsrq(MSR_P6_EVNTSEL0, val); val |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); + wrmsrq(MSR_P6_EVNTSEL0, val); } static inline void @@ -161,7 +164,7 @@ p6_pmu_disable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 val = P6_NOP_EVENT; - (void)wrmsrl_safe(hwc->config_base, val); + (void)wrmsrq_safe(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) @@ -178,7 +181,7 @@ static void p6_pmu_enable_event(struct perf_event *event) * to actually enable the events. */ - (void)wrmsrl_safe(hwc->config_base, val); + (void)wrmsrq_safe(hwc->config_base, val); } PMU_FORMAT_ATTR(event, "config:0-7" ); @@ -248,30 +251,8 @@ __init int p6_pmu_init(void) { x86_pmu = p6_pmu; - switch (boot_cpu_data.x86_model) { - case 1: /* Pentium Pro */ + if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO) x86_add_quirk(p6_pmu_rdpmc_quirk); - break; - - case 3: /* Pentium II - Klamath */ - case 5: /* Pentium II - Deschutes */ - case 6: /* Pentium II - Mendocino */ - break; - - case 7: /* Pentium III - Katmai */ - case 8: /* Pentium III - Coppermine */ - case 10: /* Pentium III Xeon */ - case 11: /* Pentium III - Tualatin */ - break; - - case 9: /* Pentium M - Banias */ - case 13: /* Pentium M - Dothan */ - break; - - default: - pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); - return -ENODEV; - } memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index fa37565f6418..e8cf29d2b10c 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -18,12 +18,13 @@ #include <linux/slab.h> #include <linux/device.h> -#include <asm/cpuid.h> +#include <asm/cpuid/api.h> #include <asm/perf_event.h> #include <asm/insn.h> #include <asm/io.h> #include <asm/intel_pt.h> #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "../perf_event.h" #include "pt.h" @@ -194,7 +195,7 @@ static int __init pt_pmu_hw_init(void) int ret; long i; - rdmsrl(MSR_PLATFORM_INFO, reg); + rdmsrq(MSR_PLATFORM_INFO, reg); pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8; /* @@ -230,7 +231,7 @@ static int __init pt_pmu_hw_init(void) * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace * post-VMXON. */ - rdmsrl(MSR_IA32_VMX_MISC, reg); + rdmsrq(MSR_IA32_VMX_MISC, reg); if (reg & BIT(14)) pt_pmu.vmx = true; } @@ -426,7 +427,7 @@ static void pt_config_start(struct perf_event *event) if (READ_ONCE(pt->vmx_on)) perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); else - wrmsrl(MSR_IA32_RTIT_CTL, ctl); + wrmsrq(MSR_IA32_RTIT_CTL, ctl); WRITE_ONCE(event->hw.aux_config, ctl); } @@ -485,12 +486,12 @@ static u64 pt_config_filters(struct perf_event *event) /* avoid redundant msr writes */ if (pt->filters.filter[range].msr_a != filter->msr_a) { - wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a); + wrmsrq(pt_address_ranges[range].msr_a, filter->msr_a); pt->filters.filter[range].msr_a = filter->msr_a; } if (pt->filters.filter[range].msr_b != filter->msr_b) { - wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b); + wrmsrq(pt_address_ranges[range].msr_b, filter->msr_b); pt->filters.filter[range].msr_b = filter->msr_b; } @@ -509,7 +510,7 @@ static void pt_config(struct perf_event *event) /* First round: clear STATUS, in particular the PSB byte counter. */ if (!event->hw.aux_config) { perf_event_itrace_started(event); - wrmsrl(MSR_IA32_RTIT_STATUS, 0); + wrmsrq(MSR_IA32_RTIT_STATUS, 0); } reg = pt_config_filters(event); @@ -569,7 +570,7 @@ static void pt_config_stop(struct perf_event *event) ctl &= ~RTIT_CTL_TRACEEN; if (!READ_ONCE(pt->vmx_on)) - wrmsrl(MSR_IA32_RTIT_CTL, ctl); + wrmsrq(MSR_IA32_RTIT_CTL, ctl); WRITE_ONCE(event->hw.aux_config, ctl); @@ -658,13 +659,13 @@ static void pt_config_buffer(struct pt_buffer *buf) reg = virt_to_phys(base); if (pt->output_base != reg) { pt->output_base = reg; - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg); + wrmsrq(MSR_IA32_RTIT_OUTPUT_BASE, reg); } reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32); if (pt->output_mask != reg) { pt->output_mask = reg; - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); + wrmsrq(MSR_IA32_RTIT_OUTPUT_MASK, reg); } } @@ -926,7 +927,7 @@ static void pt_handle_status(struct pt *pt) int advance = 0; u64 status; - rdmsrl(MSR_IA32_RTIT_STATUS, status); + rdmsrq(MSR_IA32_RTIT_STATUS, status); if (status & RTIT_STATUS_ERROR) { pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n"); @@ -970,7 +971,7 @@ static void pt_handle_status(struct pt *pt) if (advance) pt_buffer_advance(buf); - wrmsrl(MSR_IA32_RTIT_STATUS, status); + wrmsrq(MSR_IA32_RTIT_STATUS, status); } /** @@ -985,12 +986,12 @@ static void pt_read_offset(struct pt_buffer *buf) struct topa_page *tp; if (!buf->single) { - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base); + rdmsrq(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base); tp = phys_to_virt(pt->output_base); buf->cur = &tp->topa; } - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask); + rdmsrq(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask); /* offset within current output region */ buf->output_off = pt->output_mask >> 32; /* index of current output region within this table */ @@ -1585,7 +1586,7 @@ void intel_pt_handle_vmx(int on) /* Turn PTs back on */ if (!on && event) - wrmsrl(MSR_IA32_RTIT_CTL, event->hw.aux_config); + wrmsrq(MSR_IA32_RTIT_CTL, event->hw.aux_config); local_irq_restore(flags); } @@ -1611,7 +1612,7 @@ static void pt_event_start(struct perf_event *event, int mode) * PMI might have just cleared these, so resume_allowed * must be checked again also. */ - rdmsrl(MSR_IA32_RTIT_STATUS, status); + rdmsrq(MSR_IA32_RTIT_STATUS, status); if (!(status & (RTIT_STATUS_TRIGGEREN | RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED)) && @@ -1839,7 +1840,7 @@ static __init int pt_init(void) for_each_online_cpu(cpu) { u64 ctl; - ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl); + ret = rdmsrq_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl); if (!ret && (ctl & RTIT_CTL_TRACEEN)) prior_warn++; } @@ -1863,6 +1864,8 @@ static __init int pt_init(void) if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; + else + pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE; pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE | diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 60b3078b7502..e0815a12db90 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -3,6 +3,7 @@ #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/msr.h> #include "uncore.h" #include "uncore_discovery.h" @@ -150,7 +151,7 @@ u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *eve { u64 count; - rdmsrl(event->hw.event_base, count); + rdmsrq(event->hw.event_base, count); return count; } @@ -305,17 +306,11 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) { struct intel_uncore_box *box; struct perf_event *event; - unsigned long flags; int bit; box = container_of(hrtimer, struct intel_uncore_box, hrtimer); if (!box->n_active || box->cpu != smp_processor_id()) return HRTIMER_NORESTART; - /* - * disable local interrupt to prevent uncore_pmu_event_start/stop - * to interrupt the update process - */ - local_irq_save(flags); /* * handle boxes with an active event list as opposed to active @@ -328,8 +323,6 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) uncore_perf_event_update(box, box->events[bit]); - local_irq_restore(flags); - hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); return HRTIMER_RESTART; } @@ -337,7 +330,7 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) { hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), - HRTIMER_MODE_REL_PINNED); + HRTIMER_MODE_REL_PINNED_HARD); } void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) @@ -347,8 +340,7 @@ void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) { - hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - box->hrtimer.function = uncore_pmu_hrtimer; + hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); } static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 571e44b49691..18a3022f26a0 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -5,6 +5,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <asm/msr.h> #include "uncore.h" #include "uncore_discovery.h" @@ -441,17 +442,17 @@ static u64 intel_generic_uncore_box_ctl(struct intel_uncore_box *box) void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box) { - wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); + wrmsrq(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); } void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box) { - wrmsrl(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); + wrmsrq(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); } void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(intel_generic_uncore_box_ctl(box), 0); + wrmsrq(intel_generic_uncore_box_ctl(box), 0); } static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box, @@ -459,7 +460,7 @@ static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box, { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config); } static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box, @@ -467,7 +468,7 @@ static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box, { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, 0); + wrmsrq(hwc->config_base, 0); } static struct intel_uncore_ops generic_uncore_msr_ops = { diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 466833478e81..8962e7cb21e3 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Nehalem-EX/Westmere-EX uncore support */ #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "uncore.h" /* NHM-EX event control */ @@ -200,12 +201,12 @@ DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) { - wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); + wrmsrq(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); } static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box) { - wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0); + wrmsrq(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0); } static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) @@ -214,12 +215,12 @@ static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) u64 config; if (msr) { - rdmsrl(msr, config); + rdmsrq(msr, config); config &= ~((1ULL << uncore_num_counters(box)) - 1); /* WBox has a fixed counter */ if (uncore_msr_fixed_ctl(box)) config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; - wrmsrl(msr, config); + wrmsrq(msr, config); } } @@ -229,18 +230,18 @@ static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) u64 config; if (msr) { - rdmsrl(msr, config); + rdmsrq(msr, config); config |= (1ULL << uncore_num_counters(box)) - 1; /* WBox has a fixed counter */ if (uncore_msr_fixed_ctl(box)) config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; - wrmsrl(msr, config); + wrmsrq(msr, config); } } static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) { - wrmsrl(event->hw.config_base, 0); + wrmsrq(event->hw.config_base, 0); } static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -248,11 +249,11 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p struct hw_perf_event *hwc = &event->hw; if (hwc->idx == UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); + wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); else - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); + wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); } #define NHMEX_UNCORE_OPS_COMMON_INIT() \ @@ -382,10 +383,10 @@ static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct per struct hw_perf_event_extra *reg2 = &hwc->branch_reg; if (reg1->idx != EXTRA_REG_NONE) { - wrmsrl(reg1->reg, reg1->config); - wrmsrl(reg1->reg + 1, reg2->config); + wrmsrq(reg1->reg, reg1->config); + wrmsrq(reg1->reg + 1, reg2->config); } - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); } @@ -467,12 +468,12 @@ static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct per struct hw_perf_event_extra *reg2 = &hwc->branch_reg; if (reg1->idx != EXTRA_REG_NONE) { - wrmsrl(reg1->reg, 0); - wrmsrl(reg1->reg + 1, reg1->config); - wrmsrl(reg1->reg + 2, reg2->config); - wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); + wrmsrq(reg1->reg, 0); + wrmsrq(reg1->reg + 1, reg1->config); + wrmsrq(reg1->reg + 2, reg2->config); + wrmsrq(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); } - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); } static struct attribute *nhmex_uncore_sbox_formats_attr[] = { @@ -842,25 +843,25 @@ static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct per idx = __BITS_VALUE(reg1->idx, 0, 8); if (idx != 0xff) - wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), + wrmsrq(__BITS_VALUE(reg1->reg, 0, 16), nhmex_mbox_shared_reg_config(box, idx)); idx = __BITS_VALUE(reg1->idx, 1, 8); if (idx != 0xff) - wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), + wrmsrq(__BITS_VALUE(reg1->reg, 1, 16), nhmex_mbox_shared_reg_config(box, idx)); if (reg2->idx != EXTRA_REG_NONE) { - wrmsrl(reg2->reg, 0); + wrmsrq(reg2->reg, 0); if (reg2->config != ~0ULL) { - wrmsrl(reg2->reg + 1, + wrmsrq(reg2->reg + 1, reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); - wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & + wrmsrq(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); - wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); + wrmsrq(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); } } - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); + wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); } DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); @@ -1121,31 +1122,31 @@ static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct per switch (idx % 6) { case 0: - wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); + wrmsrq(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); break; case 1: - wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); + wrmsrq(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); break; case 2: case 3: - wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), + wrmsrq(NHMEX_R_MSR_PORTN_QLX_CFG(port), uncore_shared_reg_config(box, 2 + (idx / 6) * 5)); break; case 4: - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), hwc->config >> 32); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); break; case 5: - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), hwc->config >> 32); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); break; } - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); } diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index edb7fd50efe0..a1a96833e30e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ +#include <asm/msr.h> #include "uncore.h" #include "uncore_discovery.h" @@ -260,34 +261,34 @@ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct per struct hw_perf_event *hwc = &event->hw; if (hwc->idx < UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); else - wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); + wrmsrq(hwc->config_base, SNB_UNC_CTL_EN); } static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) { - wrmsrl(event->hw.config_base, 0); + wrmsrq(event->hw.config_base, 0); } static void snb_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) { - wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + wrmsrq(SNB_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); } } static void snb_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + wrmsrq(SNB_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); } static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(SNB_UNC_PERF_GLOBAL_CTL, 0); } static struct uncore_event_desc snb_uncore_events[] = { @@ -372,7 +373,7 @@ void snb_uncore_cpu_init(void) static void skl_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) { - wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); } @@ -383,14 +384,14 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box) static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); } static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, 0); } static struct intel_uncore_ops skl_uncore_msr_ops = { @@ -504,7 +505,7 @@ static int icl_get_cbox_num(void) { u64 num_boxes; - rdmsrl(ICL_UNC_CBO_CONFIG, num_boxes); + rdmsrq(ICL_UNC_CBO_CONFIG, num_boxes); return num_boxes & ICL_UNC_NUM_CBO_MASK; } @@ -525,7 +526,7 @@ static struct intel_uncore_type *tgl_msr_uncores[] = { static void rkl_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); } void tgl_uncore_cpu_init(void) @@ -541,24 +542,24 @@ void tgl_uncore_cpu_init(void) static void adl_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); } static void adl_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); } static void adl_uncore_msr_disable_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, 0); } static void adl_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, 0); } static struct intel_uncore_ops adl_uncore_msr_ops = { @@ -691,7 +692,7 @@ static struct intel_uncore_type mtl_uncore_hac_cbox = { static void mtl_uncore_msr_init_box(struct intel_uncore_box *box) { - wrmsrl(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN); } static struct intel_uncore_ops mtl_uncore_msr_ops = { @@ -758,7 +759,7 @@ static struct intel_uncore_type *lnl_msr_uncores[] = { static void lnl_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(LNL_UNC_MSR_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(LNL_UNC_MSR_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); } static struct intel_uncore_ops lnl_uncore_msr_ops = { @@ -1306,12 +1307,12 @@ int skl_uncore_pci_init(void) /* Nehalem uncore support */ static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) { - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(NHM_UNC_PERF_GLOBAL_CTL, 0); } static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); + wrmsrq(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); } static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -1319,9 +1320,9 @@ static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct per struct hw_perf_event *hwc = &event->hw; if (hwc->idx < UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); else - wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); + wrmsrq(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); } static struct attribute *nhm_uncore_formats_attr[] = { diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 60973c209c0e..2824dc9950be 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* SandyBridge-EP/IvyTown uncore support */ #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "uncore.h" #include "uncore_discovery.h" @@ -618,9 +619,9 @@ static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) msr = uncore_msr_box_ctl(box); if (msr) { - rdmsrl(msr, config); + rdmsrq(msr, config); config |= SNBEP_PMON_BOX_CTL_FRZ; - wrmsrl(msr, config); + wrmsrq(msr, config); } } @@ -631,9 +632,9 @@ static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) msr = uncore_msr_box_ctl(box); if (msr) { - rdmsrl(msr, config); + rdmsrq(msr, config); config &= ~SNBEP_PMON_BOX_CTL_FRZ; - wrmsrl(msr, config); + wrmsrq(msr, config); } } @@ -643,9 +644,9 @@ static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct p struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0)); + wrmsrq(reg1->reg, uncore_shared_reg_config(box, 0)); - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, @@ -653,7 +654,7 @@ static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config); } static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) @@ -661,7 +662,7 @@ static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) unsigned msr = uncore_msr_box_ctl(box); if (msr) - wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); + wrmsrq(msr, SNBEP_PMON_BOX_CTL_INT); } static struct attribute *snbep_uncore_formats_attr[] = { @@ -1532,7 +1533,7 @@ static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box) { unsigned msr = uncore_msr_box_ctl(box); if (msr) - wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT); + wrmsrq(msr, IVBEP_PMON_BOX_CTL_INT); } static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box) @@ -1783,11 +1784,11 @@ static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_ev if (reg1->idx != EXTRA_REG_NONE) { u64 filter = uncore_shared_reg_config(box, 0); - wrmsrl(reg1->reg, filter & 0xffffffff); - wrmsrl(reg1->reg + 6, filter >> 32); + wrmsrq(reg1->reg, filter & 0xffffffff); + wrmsrq(reg1->reg + 6, filter >> 32); } - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static struct intel_uncore_ops ivbep_uncore_cbox_ops = { @@ -2767,11 +2768,11 @@ static void hswep_cbox_enable_event(struct intel_uncore_box *box, if (reg1->idx != EXTRA_REG_NONE) { u64 filter = uncore_shared_reg_config(box, 0); - wrmsrl(reg1->reg, filter & 0xffffffff); - wrmsrl(reg1->reg + 1, filter >> 32); + wrmsrq(reg1->reg, filter & 0xffffffff); + wrmsrq(reg1->reg + 1, filter >> 32); } - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static struct intel_uncore_ops hswep_uncore_cbox_ops = { @@ -2816,7 +2817,7 @@ static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box) for_each_set_bit(i, (unsigned long *)&init, 64) { flags |= (1ULL << i); - wrmsrl(msr, flags); + wrmsrq(msr, flags); } } } @@ -3708,7 +3709,7 @@ static void skx_iio_enable_event(struct intel_uncore_box *box, { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static struct intel_uncore_ops skx_uncore_iio_ops = { @@ -3765,7 +3766,7 @@ static int skx_msr_cpu_bus_read(int cpu, u64 *topology) { u64 msr_value; - if (rdmsrl_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) || + if (rdmsrq_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) || !(msr_value & SKX_MSR_CPU_BUS_VALID_BIT)) return -ENXIO; @@ -4655,9 +4656,9 @@ static void snr_cha_enable_event(struct intel_uncore_box *box, struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, reg1->config); + wrmsrq(reg1->reg, reg1->config); - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static struct intel_uncore_ops snr_uncore_chabox_ops = { @@ -4891,28 +4892,28 @@ static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), /* Free-Running IIO BANDWIDTH IN Counters */ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), { /* end: all zeroes */ }, }; @@ -5485,37 +5486,6 @@ static struct freerunning_counters icx_iio_freerunning[] = { [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, }; -static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type icx_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 9, @@ -5523,7 +5493,7 @@ static struct intel_uncore_type icx_uncore_iio_free_running = { .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, .freerunning = icx_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = icx_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; @@ -5913,9 +5883,9 @@ static void spr_uncore_msr_enable_event(struct intel_uncore_box *box, struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, reg1->config); + wrmsrq(reg1->reg, reg1->config); - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config); } static void spr_uncore_msr_disable_event(struct intel_uncore_box *box, @@ -5925,9 +5895,9 @@ static void spr_uncore_msr_disable_event(struct intel_uncore_box *box, struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, 0); + wrmsrq(reg1->reg, 0); - wrmsrl(hwc->config_base, 0); + wrmsrq(hwc->config_base, 0); } static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) @@ -6320,69 +6290,13 @@ static struct freerunning_counters spr_iio_freerunning[] = { [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, }; -static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - /* Free-Running IIO BANDWIDTH OUT Counters */ - INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type spr_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 17, .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, .freerunning = spr_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = spr_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; @@ -6572,7 +6486,7 @@ void spr_uncore_cpu_init(void) * of UNCORE_SPR_CHA) is incorrect on some SPR variants because of a * firmware bug. Using the value from SPR_MSR_UNC_CBO_CONFIG to replace it. */ - rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo); + rdmsrq(SPR_MSR_UNC_CBO_CONFIG, num_cbo); /* * The MSR doesn't work on the EMR XCC, but the firmware bug doesn't impact * the EMR XCC. Don't let the value from the MSR replace the existing value. diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 45b1866ff051..7f5007a4752a 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -3,6 +3,8 @@ #include <linux/sysfs.h> #include <linux/nospec.h> #include <asm/cpu_device_id.h> +#include <asm/msr.h> + #include "probe.h" enum perf_msr_id { @@ -231,7 +233,7 @@ static inline u64 msr_read_counter(struct perf_event *event) u64 now; if (event->hw.event_base) - rdmsrl(event->hw.event_base, now); + rdmsrq(event->hw.event_base, now); else now = rdtsc_ordered(); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 31c2771545a6..2b969386dcdd 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -17,6 +17,7 @@ #include <asm/fpu/xstate.h> #include <asm/intel_ds.h> #include <asm/cpu.h> +#include <asm/msr.h> /* To enable MSR tracing please use the generic trace points. */ @@ -110,9 +111,26 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +int is_x86_event(struct perf_event *event); + +static inline bool check_leader_group(struct perf_event *leader, int flags) +{ + return is_x86_event(leader) ? !!(leader->hw.flags & flags) : false; +} + static inline bool is_branch_counters_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; + return check_leader_group(event->group_leader, PERF_X86_EVENT_BRANCH_COUNTERS); +} + +static inline bool is_pebs_counter_event_group(struct perf_event *event) +{ + return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR); +} + +static inline bool is_acr_event_group(struct perf_event *event) +{ + return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR); } struct amd_nb { @@ -256,6 +274,7 @@ struct cpu_hw_events { struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; int n_excl; /* the number of exclusive events */ + int n_late_setup; /* the num of events needs late setup */ unsigned int txn_flags; int is_fake; @@ -281,6 +300,10 @@ struct cpu_hw_events { u64 fixed_ctrl_val; u64 active_fixed_ctrl_val; + /* Intel ACR configuration */ + u64 acr_cfg_b[X86_PMC_IDX_MAX]; + u64 acr_cfg_c[X86_PMC_IDX_MAX]; + /* * Intel LBR bits */ @@ -669,18 +692,6 @@ enum { #define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) -/* - * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture - * of the core. Bits 31-24 indicates its core type (Core or Atom) - * and Bits [23:0] indicates the native model ID of the core. - * Core type and native model ID are defined in below enumerations. - */ -enum hybrid_cpu_type { - HYBRID_INTEL_NONE, - HYBRID_INTEL_ATOM = 0x20, - HYBRID_INTEL_CORE = 0x40, -}; - #define X86_HYBRID_PMU_ATOM_IDX 0 #define X86_HYBRID_PMU_CORE_IDX 1 #define X86_HYBRID_PMU_TINY_IDX 2 @@ -697,11 +708,6 @@ enum hybrid_pmu_type { hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, }; -enum atom_native_id { - cmt_native_id = 0x2, /* Crestmont */ - skt_native_id = 0x3, /* Skymont */ -}; - struct x86_hybrid_pmu { struct pmu pmu; const char *name; @@ -719,6 +725,15 @@ struct x86_hybrid_pmu { u64 fixed_cntr_mask64; unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; }; + + union { + u64 acr_cntr_mask64; + unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cause_mask64; + unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; struct event_constraint unconstrained; u64 hw_cache_event_ids @@ -800,6 +815,11 @@ struct x86_pmu { u64 (*update)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); + void (*late_setup)(void); + void (*pebs_enable)(struct perf_event *event); + void (*pebs_disable)(struct perf_event *event); + void (*pebs_enable_all)(void); + void (*pebs_disable_all)(void); unsigned eventsel; unsigned perfctr; unsigned fixedctr; @@ -816,6 +836,14 @@ struct x86_pmu { u64 fixed_cntr_mask64; unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; }; + union { + u64 acr_cntr_mask64; + unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cause_mask64; + unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; int cntval_bits; u64 cntval_mask; union { @@ -869,7 +897,7 @@ struct x86_pmu { void (*check_microcode)(void); void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, - bool sched_in); + struct task_struct *task, bool sched_in); /* * Intel Arch Perfmon v2+ @@ -882,7 +910,7 @@ struct x86_pmu { */ unsigned int bts :1, bts_active :1, - pebs :1, + ds_pebs :1, pebs_active :1, pebs_broken :1, pebs_prec_dist :1, @@ -914,6 +942,7 @@ struct x86_pmu { const int *lbr_sel_map; /* lbr_select mappings */ int *lbr_ctl_map; /* LBR_CTL mappings */ }; + u64 lbr_callstack_users; /* lbr callstack system wide users */ bool lbr_double_abort; /* duplicated lbr aborts */ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ @@ -952,14 +981,6 @@ struct x86_pmu { int num_topdown_events; /* - * perf task context (i.e. struct perf_event_pmu_context::task_ctx_data) - * switch helper to bridge calls from perf/core to perf/x86. - * See struct pmu::swap_task_ctx() usage for examples; - */ - void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc); - - /* * AMD bits */ unsigned int amd_nb_constraints : 1; @@ -994,7 +1015,7 @@ struct x86_pmu { */ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; - enum hybrid_cpu_type (*get_hybrid_cpu_type) (void); + enum intel_cpu_type (*get_hybrid_cpu_type) (void); }; struct x86_perf_task_context_opt { @@ -1060,6 +1081,7 @@ do { \ #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ #define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ #define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ +#define PMU_FL_DYN_CONSTRAINT 0x800 /* Needs dynamic constraint */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -1102,11 +1124,18 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ .pmu_type = _pmu, \ } +int is_x86_event(struct perf_event *event); struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); +DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); +DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup); +DECLARE_STATIC_CALL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable); +DECLARE_STATIC_CALL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable); +DECLARE_STATIC_CALL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all); +DECLARE_STATIC_CALL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all); static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) { @@ -1148,6 +1177,12 @@ extern u64 __read_mostly hw_cache_extra_regs u64 x86_perf_event_update(struct perf_event *event); +static inline u64 intel_pmu_topdown_event_update(struct perf_event *event, u64 *val) +{ + return x86_perf_event_update(event); +} +DECLARE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); + static inline unsigned int x86_pmu_config_addr(int index) { return x86_pmu.eventsel + (x86_pmu.addr_offset ? @@ -1208,16 +1243,16 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); if (hwc->extra_reg.reg) - wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); + wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config); /* * Add enabled Merge event on next counter * if large increment event being enabled on this counter */ if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); + wrmsrq(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); - wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); + wrmsrq(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } void x86_pmu_enable_all(int added); @@ -1233,10 +1268,10 @@ static inline void x86_pmu_disable_event(struct perf_event *event) u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config & ~disable_mask); + wrmsrq(hwc->config_base, hwc->config & ~disable_mask); if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); + wrmsrq(x86_pmu_config_addr(hwc->idx + 1), 0); } void x86_pmu_enable_event(struct perf_event *event); @@ -1394,7 +1429,8 @@ void amd_pmu_lbr_reset(void); void amd_pmu_lbr_read(void); void amd_pmu_lbr_add(struct perf_event *event); void amd_pmu_lbr_del(struct perf_event *event); -void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); void amd_pmu_lbr_enable_all(void); void amd_pmu_lbr_disable_all(void); int amd_pmu_lbr_hw_config(struct perf_event *event); @@ -1403,12 +1439,12 @@ static __always_inline void __amd_pmu_lbr_disable(void) { u64 dbg_ctl, dbg_extn_cfg; - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); + rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); } } @@ -1448,7 +1484,8 @@ static inline void amd_pmu_brs_del(struct perf_event *event) perf_sched_cb_dec(event->pmu); } -void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); #else static inline int amd_brs_init(void) { @@ -1473,7 +1510,8 @@ static inline void amd_pmu_brs_del(struct perf_event *event) { } -static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { } @@ -1538,21 +1576,21 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) static __always_inline void __intel_pmu_pebs_disable_all(void) { - wrmsrl(MSR_IA32_PEBS_ENABLE, 0); + wrmsrq(MSR_IA32_PEBS_ENABLE, 0); } static __always_inline void __intel_pmu_arch_lbr_disable(void) { - wrmsrl(MSR_ARCH_LBR_CTL, 0); + wrmsrq(MSR_ARCH_LBR_CTL, 0); } static __always_inline void __intel_pmu_lbr_disable(void) { u64 debugctl; - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); } int intel_pmu_save_and_restart(struct perf_event *event); @@ -1587,6 +1625,8 @@ void intel_pmu_disable_bts(void); int intel_pmu_drain_bts_buffer(void); +void intel_pmu_late_setup(void); + u64 grt_latency_data(struct perf_event *event, u64 status); u64 cmt_latency_data(struct perf_event *event, u64 status); @@ -1643,20 +1683,20 @@ void intel_pmu_pebs_disable_all(void); void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); -void intel_pmu_auto_reload_read(struct perf_event *event); +void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc); + +void intel_pmu_drain_pebs_buffer(void); void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); -void intel_ds_init(void); +void intel_pebs_init(void); void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, struct cpu_hw_events *cpuc, struct perf_event *event); -void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc); - -void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); u64 lbr_from_signext_quirk_wr(u64 val); diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h index 6c977c19f2cd..70078334e4a3 100644 --- a/arch/x86/events/perf_event_flags.h +++ b/arch/x86/events/perf_event_flags.h @@ -2,23 +2,24 @@ /* * struct hw_perf_event.flags flags */ -PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */ -PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */ -PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */ -PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */ -PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */ -PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */ -PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */ - /* 0x00080 */ -PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */ -PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */ -PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */ -PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */ -PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */ -PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */ -PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */ -PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */ -PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */ -PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */ -PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */ -PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */ +PERF_ARCH(PEBS_LDLAT, 0x0000001) /* ld+ldlat data address sampling */ +PERF_ARCH(PEBS_ST, 0x0000002) /* st data address sampling */ +PERF_ARCH(PEBS_ST_HSW, 0x0000004) /* haswell style datala, store */ +PERF_ARCH(PEBS_LD_HSW, 0x0000008) /* haswell style datala, load */ +PERF_ARCH(PEBS_NA_HSW, 0x0000010) /* haswell style datala, unknown */ +PERF_ARCH(EXCL, 0x0000020) /* HT exclusivity on counter */ +PERF_ARCH(DYNAMIC, 0x0000040) /* dynamic alloc'd constraint */ +PERF_ARCH(PEBS_CNTR, 0x0000080) /* PEBS counters snapshot */ +PERF_ARCH(EXCL_ACCT, 0x0000100) /* accounted EXCL event */ +PERF_ARCH(AUTO_RELOAD, 0x0000200) /* use PEBS auto-reload */ +PERF_ARCH(LARGE_PEBS, 0x0000400) /* use large PEBS */ +PERF_ARCH(PEBS_VIA_PT, 0x0000800) /* use PT buffer for PEBS */ +PERF_ARCH(PAIR, 0x0001000) /* Large Increment per Cycle */ +PERF_ARCH(LBR_SELECT, 0x0002000) /* Save/Restore MSR_LBR_SELECT */ +PERF_ARCH(TOPDOWN, 0x0004000) /* Count Topdown slots/metrics events */ +PERF_ARCH(PEBS_STLAT, 0x0008000) /* st+stlat data address sampling */ +PERF_ARCH(AMD_BRS, 0x0010000) /* AMD Branch Sampling */ +PERF_ARCH(PEBS_LAT_HYBRID, 0x0020000) /* ld and st lat for hybrid */ +PERF_ARCH(NEEDS_BRANCH_STACK, 0x0040000) /* require branch stack setup */ +PERF_ARCH(BRANCH_COUNTERS, 0x0080000) /* logs the counters in the extra space of each branch */ +PERF_ARCH(ACR, 0x0100000) /* Auto counter reload */ diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c index 600bf8d15c0c..bb719d0d3f0b 100644 --- a/arch/x86/events/probe.c +++ b/arch/x86/events/probe.c @@ -2,6 +2,8 @@ #include <linux/export.h> #include <linux/types.h> #include <linux/bits.h> + +#include <asm/msr.h> #include "probe.h" static umode_t @@ -43,7 +45,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) if (msr[bit].test && !msr[bit].test(bit, data)) continue; /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ - if (rdmsrl_safe(msr[bit].msr, &val)) + if (rdmsrq_safe(msr[bit].msr, &val)) continue; mask = msr[bit].mask; diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 6941f4811bec..defd86137f12 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -65,6 +65,7 @@ #include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/msr.h> #include "perf_event.h" #include "probe.h" @@ -192,7 +193,7 @@ static inline unsigned int get_rapl_pmu_idx(int cpu, int scope) static inline u64 rapl_read_counter(struct perf_event *event) { u64 raw; - rdmsrl(event->hw.event_base, raw); + rdmsrq(event->hw.event_base, raw); return raw; } @@ -221,7 +222,7 @@ static u64 rapl_event_update(struct perf_event *event) prev_raw_count = local64_read(&hwc->prev_count); do { - rdmsrl(event->hw.event_base, new_raw_count); + rdmsrq(event->hw.event_base, new_raw_count); } while (!local64_try_cmpxchg(&hwc->prev_count, &prev_raw_count, new_raw_count)); @@ -274,8 +275,7 @@ static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu) { struct hrtimer *hr = &rapl_pmu->hrtimer; - hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hr->function = rapl_hrtimer_handle; + hrtimer_setup(hr, rapl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL); } static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu, @@ -611,8 +611,8 @@ static int rapl_check_hw_unit(void) u64 msr_rapl_power_unit_bits; int i; - /* protect rdmsrl() to handle virtualization */ - if (rdmsrl_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits)) + /* protect rdmsrq() to handle virtualization */ + if (rdmsrq_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits)) return -1; for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; @@ -730,6 +730,7 @@ static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_ { int nr_rapl_pmu = topology_max_packages(); struct rapl_pmus *rapl_pmus; + int ret; /* * rapl_pmu_scope must be either PKG, DIE or CORE @@ -761,7 +762,11 @@ static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_ rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; - return init_rapl_pmu(rapl_pmus); + ret = init_rapl_pmu(rapl_pmus); + if (ret) + kfree(rapl_pmus); + + return ret; } static struct rapl_model model_snb = { diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c index dab4ed199227..77fd00b3305e 100644 --- a/arch/x86/events/utils.c +++ b/arch/x86/events/utils.c @@ -2,6 +2,7 @@ #include <asm/insn.h> #include <linux/mm.h> +#include <asm/msr.h> #include "perf_event.h" static int decode_branch_type(struct insn *insn) diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c index 2fd9b0cf9a5e..4bdfcf091200 100644 --- a/arch/x86/events/zhaoxin/core.c +++ b/arch/x86/events/zhaoxin/core.c @@ -15,6 +15,7 @@ #include <asm/cpufeature.h> #include <asm/hardirq.h> #include <asm/apic.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -254,26 +255,26 @@ static __initconst const u64 zxe_hw_cache_event_ids static void zhaoxin_pmu_disable_all(void) { - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0); } static void zhaoxin_pmu_enable_all(int added) { - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); } static inline u64 zhaoxin_pmu_get_status(void) { u64 status; - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status); return status; } static inline void zhaoxin_pmu_ack_status(u64 ack) { - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); + wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } static inline void zxc_pmu_ack_status(u64 ack) @@ -293,9 +294,9 @@ static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc) mask = 0xfULL << (idx * 4); - rdmsrl(hwc->config_base, ctrl_val); + rdmsrq(hwc->config_base, ctrl_val); ctrl_val &= ~mask; - wrmsrl(hwc->config_base, ctrl_val); + wrmsrq(hwc->config_base, ctrl_val); } static void zhaoxin_pmu_disable_event(struct perf_event *event) @@ -329,10 +330,10 @@ static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc) bits <<= (idx * 4); mask = 0xfULL << (idx * 4); - rdmsrl(hwc->config_base, ctrl_val); + rdmsrq(hwc->config_base, ctrl_val); ctrl_val &= ~mask; ctrl_val |= bits; - wrmsrl(hwc->config_base, ctrl_val); + wrmsrq(hwc->config_base, ctrl_val); } static void zhaoxin_pmu_enable_event(struct perf_event *event) @@ -397,8 +398,7 @@ again: if (!x86_perf_event_set_period(event)) continue; - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* |