diff options
Diffstat (limited to 'arch/x86/events')
32 files changed, 4122 insertions, 1974 deletions
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c index 780acd3dff22..06f35a6b58a5 100644 --- a/arch/x86/events/amd/brs.c +++ b/arch/x86/events/amd/brs.c @@ -44,12 +44,12 @@ static inline unsigned int brs_to(int idx) static __always_inline void set_debug_extn_cfg(u64 val) { /* bits[4:3] must always be set to 11b */ - __wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32); + native_wrmsrq(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3); } static __always_inline u64 get_debug_extn_cfg(void) { - return __rdmsr(MSR_AMD_DBG_EXTN_CFG); + return native_rdmsrq(MSR_AMD_DBG_EXTN_CFG); } static bool __init amd_brs_detect(void) @@ -187,7 +187,7 @@ void amd_brs_reset(void) /* * Mark first entry as poisoned */ - wrmsrl(brs_to(0), BRS_POISON); + wrmsrq(brs_to(0), BRS_POISON); } int __init amd_brs_init(void) @@ -325,7 +325,7 @@ void amd_brs_drain(void) u32 brs_idx = tos - i; u64 from, to; - rdmsrl(brs_to(brs_idx), to); + rdmsrq(brs_to(brs_idx), to); /* Entry does not belong to us (as marked by kernel) */ if (to == BRS_POISON) @@ -341,7 +341,7 @@ void amd_brs_drain(void) if (!amd_brs_match_plm(event, to)) continue; - rdmsrl(brs_from(brs_idx), from); + rdmsrq(brs_from(brs_idx), from); perf_clear_branch_entry_bitfields(br+nr); @@ -371,7 +371,7 @@ static void amd_brs_poison_buffer(void) idx = amd_brs_get_tos(&cfg); /* Poison target of entry */ - wrmsrl(brs_to(idx), BRS_POISON); + wrmsrq(brs_to(idx), BRS_POISON); } /* @@ -381,7 +381,8 @@ static void amd_brs_poison_buffer(void) * On ctxswin, sched_in = true, called after the PMU has started * On ctxswout, sched_in = false, called before the PMU is stopped */ -void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 1fc4ce44e743..b20661b8621d 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -9,6 +9,7 @@ #include <linux/jiffies.h> #include <asm/apicdef.h> #include <asm/apic.h> +#include <asm/msr.h> #include <asm/nmi.h> #include "../perf_event.h" @@ -432,8 +433,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, * be removed on one CPU at a time AND PMU is disabled * when we come here */ - for (i = 0; i < x86_pmu.num_counters; i++) { - if (cmpxchg(nb->owners + i, event, NULL) == event) + for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { + struct perf_event *tmp = event; + + if (try_cmpxchg(nb->owners + i, &tmp, NULL)) break; } } @@ -499,7 +502,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev * because of successive calls to x86_schedule_events() from * hw_perf_group_sched_in() without hw_perf_enable() */ - for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { + for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) { if (new == -1 || hwc->idx == idx) /* assign free slot, prefer hwc->idx */ old = cmpxchg(nb->owners + idx, NULL, event); @@ -542,7 +545,7 @@ static struct amd_nb *amd_alloc_nb(int cpu) /* * initialize all possible NB constraints */ - for (i = 0; i < x86_pmu.num_counters; i++) { + for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { __set_bit(i, nb->event_constraints[i].idxmsk); nb->event_constraints[i].weight = 1; } @@ -561,13 +564,13 @@ static void amd_pmu_cpu_reset(int cpu) return; /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); /* * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze * and PerfCntrGLobalStatus.PerfCntrOvfl */ - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask); } @@ -649,7 +652,7 @@ static void amd_pmu_cpu_dead(int cpu) static __always_inline void amd_pmu_set_global_ctl(u64 ctl) { - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); } static inline u64 amd_pmu_get_global_status(void) @@ -657,7 +660,7 @@ static inline u64 amd_pmu_get_global_status(void) u64 status; /* PerfCntrGlobalStatus is read-only */ - rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); + rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); return status; } @@ -670,14 +673,14 @@ static inline void amd_pmu_ack_global_status(u64 status) * clears the same bit in PerfCntrGlobalStatus */ - wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); + wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); } static bool amd_pmu_test_overflow_topbit(int idx) { u64 counter; - rdmsrl(x86_pmu_event_addr(idx), counter); + rdmsrq(x86_pmu_event_addr(idx), counter); return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1)); } @@ -735,7 +738,7 @@ static void amd_pmu_check_overflow(void) * counters are always enabled when this function is called and * ARCH_PERFMON_EVENTSEL_INT is always set. */ - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -755,7 +758,7 @@ static void amd_pmu_enable_all(int added) amd_brs_enable_all(); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { /* only activate events which are marked as active */ if (!test_bit(idx, cpuc->active_mask)) continue; @@ -941,11 +944,12 @@ static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, u static int amd_pmu_v2_handle_irq(struct pt_regs *regs) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + static atomic64_t status_warned = ATOMIC64_INIT(0); + u64 reserved, status, mask, new_bits, prev_bits; struct perf_sample_data data; struct hw_perf_event *hwc; struct perf_event *event; int handled = 0, idx; - u64 reserved, status, mask; bool pmu_enabled; /* @@ -978,7 +982,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) /* Clear any reserved bits set by buggy microcode */ status &= amd_pmu_global_cntr_mask; - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -998,11 +1002,9 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; - if (has_branch_stack(event)) - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* @@ -1010,7 +1012,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) * the corresponding PMCs are expected to be inactive according to the * active_mask */ - WARN_ON(status > 0); + if (status > 0) { + prev_bits = atomic64_fetch_or(status, &status_warned); + // A new bit was set for the very first time. + new_bits = status & ~prev_bits; + WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits); + } /* Clear overflow and freeze bits */ amd_pmu_ack_global_status(~status); @@ -1313,7 +1320,7 @@ static __initconst const struct x86_pmu amd_pmu = { .addr_offset = amd_pmu_addr_offset, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = AMD64_NUM_COUNTERS, + .cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0), .add = amd_pmu_add_event, .del = amd_pmu_del_event, .cntval_bits = 48, @@ -1412,7 +1419,7 @@ static int __init amd_core_pmu_init(void) */ x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.perfctr = MSR_F15H_PERF_CTR; - x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; + x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0); /* Check for Performance Monitoring v2 support */ if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { @@ -1422,9 +1429,9 @@ static int __init amd_core_pmu_init(void) x86_pmu.version = 2; /* Find the number of available Core PMCs */ - x86_pmu.num_counters = ebx.split.num_core_pmc; + x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0); - amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1; + amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64; /* Update PMC handling functions */ x86_pmu.enable_all = amd_pmu_v2_enable_all; @@ -1452,12 +1459,12 @@ static int __init amd_core_pmu_init(void) * even numbered counter that has a consecutive adjacent odd * numbered counter following it. */ - for (i = 0; i < x86_pmu.num_counters - 1; i += 2) + for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2) even_ctr_mask |= BIT_ULL(i); pair_constraint = (struct event_constraint) __EVENT_CONSTRAINT(0, even_ctr_mask, 0, - x86_pmu.num_counters / 2, 0, + x86_pmu_max_num_counters(NULL) / 2, 0, PERF_X86_EVENT_PAIR); x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index e91970b01d62..112f43b23ebf 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -15,6 +15,7 @@ #include <linux/sched/clock.h> #include <asm/apic.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -26,11 +27,10 @@ static u32 ibs_caps; #include <linux/hardirq.h> #include <asm/nmi.h> -#include <asm/amd-ibs.h> - -#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) -#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT +#include <asm/amd/ibs.h> +/* attr.config2 */ +#define IBS_SW_FILTER_MASK 1 /* * IBS states: @@ -87,6 +87,7 @@ struct perf_ibs { u64 cnt_mask; u64 enable_mask; u64 valid_mask; + u16 min_period; u64 max_period; unsigned long offset_mask[1]; int offset_max; @@ -268,11 +269,19 @@ static int validate_group(struct perf_event *event) return 0; } +static bool perf_ibs_ldlat_event(struct perf_ibs *perf_ibs, + struct perf_event *event) +{ + return perf_ibs == &perf_ibs_op && + (ibs_caps & IBS_CAPS_OPLDLAT) && + (event->attr.config1 & 0xFFF); +} + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs; - u64 max_cnt, config; + u64 config; int ret; perf_ibs = get_ibs_pmu(event->attr.type); @@ -290,6 +299,16 @@ static int perf_ibs_init(struct perf_event *event) if (has_branch_stack(event)) return -EOPNOTSUPP; + /* handle exclude_{user,kernel} in the IRQ handler */ + if (event->attr.exclude_host || event->attr.exclude_guest || + event->attr.exclude_idle) + return -EINVAL; + + if (!(event->attr.config2 & IBS_SW_FILTER_MASK) && + (event->attr.exclude_kernel || event->attr.exclude_user || + event->attr.exclude_hv)) + return -EINVAL; + ret = validate_group(event); if (ret) return ret; @@ -298,25 +317,47 @@ static int perf_ibs_init(struct perf_event *event) if (config & perf_ibs->cnt_mask) /* raw max_cnt may not be set */ return -EINVAL; - if (!event->attr.sample_freq && hwc->sample_period & 0x0f) - /* - * lower 4 bits can not be set in ibs max cnt, - * but allowing it in case we adjust the - * sample period to set a frequency. - */ - return -EINVAL; - hwc->sample_period &= ~0x0FULL; - if (!hwc->sample_period) - hwc->sample_period = 0x10; + + if (event->attr.freq) { + hwc->sample_period = perf_ibs->min_period; + } else { + /* Silently mask off lower nibble. IBS hw mandates it. */ + hwc->sample_period &= ~0x0FULL; + if (hwc->sample_period < perf_ibs->min_period) + return -EINVAL; + } } else { - max_cnt = config & perf_ibs->cnt_mask; + u64 period = 0; + + if (event->attr.freq) + return -EINVAL; + + if (perf_ibs == &perf_ibs_op) { + period = (config & IBS_OP_MAX_CNT) << 4; + if (ibs_caps & IBS_CAPS_OPCNTEXT) + period |= config & IBS_OP_MAX_CNT_EXT_MASK; + } else { + period = (config & IBS_FETCH_MAX_CNT) << 4; + } + config &= ~perf_ibs->cnt_mask; - event->attr.sample_period = max_cnt << 4; - hwc->sample_period = event->attr.sample_period; + event->attr.sample_period = period; + hwc->sample_period = period; + + if (hwc->sample_period < perf_ibs->min_period) + return -EINVAL; } - if (!hwc->sample_period) - return -EINVAL; + if (perf_ibs_ldlat_event(perf_ibs, event)) { + u64 ldlat = event->attr.config1 & 0xFFF; + + if (ldlat < 128 || ldlat > 2048) + return -EINVAL; + ldlat >>= 7; + + config |= (ldlat - 1) << 59; + config |= IBS_OP_L3MISSONLY | IBS_OP_LDLAT_EN; + } /* * If we modify hwc->sample_period, we also need to update @@ -337,7 +378,8 @@ static int perf_ibs_set_period(struct perf_ibs *perf_ibs, int overflow; /* ignore lower 4 bits in min count: */ - overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); + overflow = perf_event_set_period(hwc, perf_ibs->min_period, + perf_ibs->max_period, period); local64_set(&hwc->prev_count, 0); return overflow; @@ -383,7 +425,7 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, * prev count manually on overflow. */ while (!perf_event_try_update(event, count, 64)) { - rdmsrl(event->hw.config_base, *config); + rdmsrq(event->hw.config_base, *config); count = perf_ibs->get_count(*config); } } @@ -394,9 +436,9 @@ static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, u64 tmp = hwc->config | config; if (perf_ibs->fetch_count_reset_broken) - wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); + wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask); - wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); + wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask); } /* @@ -411,9 +453,9 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, { config &= ~perf_ibs->cnt_mask; if (boot_cpu_data.x86 == 0x10) - wrmsrl(hwc->config_base, config); + wrmsrq(hwc->config_base, config); config &= ~perf_ibs->enable_mask; - wrmsrl(hwc->config_base, config); + wrmsrq(hwc->config_base, config); } /* @@ -435,6 +477,9 @@ static void perf_ibs_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; + if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) + hwc->sample_period = perf_ibs->min_period; + perf_ibs_set_period(perf_ibs, hwc, &period); if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { config |= period & IBS_OP_MAX_CNT_EXT_MASK; @@ -469,7 +514,7 @@ static void perf_ibs_stop(struct perf_event *event, int flags) if (!stopping && (hwc->state & PERF_HES_UPTODATE)) return; - rdmsrl(hwc->config_base, config); + rdmsrq(hwc->config_base, config); if (stopping) { /* @@ -542,6 +587,28 @@ static void perf_ibs_del(struct perf_event *event, int flags) static void perf_ibs_read(struct perf_event *event) { } +static int perf_ibs_check_period(struct perf_event *event, u64 value) +{ + struct perf_ibs *perf_ibs; + u64 low_nibble; + + if (event->attr.freq) + return 0; + + perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + low_nibble = value & 0xFULL; + + /* + * This contradicts with perf_ibs_init() which allows sample period + * with lower nibble bits set but silently masks them off. Whereas + * this returns error. + */ + if (low_nibble || value < perf_ibs->min_period) + return -EINVAL; + + return 0; +} + /* * We need to initialize with empty group if all attributes in the * group are dynamic. @@ -550,27 +617,20 @@ static struct attribute *attrs_empty[] = { NULL, }; -static struct attribute_group empty_format_group = { - .name = "format", - .attrs = attrs_empty, -}; - static struct attribute_group empty_caps_group = { .name = "caps", .attrs = attrs_empty, }; -static const struct attribute_group *empty_attr_groups[] = { - &empty_format_group, - &empty_caps_group, - NULL, -}; - PMU_FORMAT_ATTR(rand_en, "config:57"); PMU_FORMAT_ATTR(cnt_ctl, "config:19"); +PMU_FORMAT_ATTR(swfilt, "config2:0"); PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); +PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11"); PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); +PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1"); +PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1"); static umode_t zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) @@ -578,8 +638,21 @@ zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; } -static struct attribute *rand_en_attrs[] = { +static umode_t +ibs_op_ldlat_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_OPLDLAT ? attr->mode : 0; +} + +static umode_t +ibs_op_dtlb_pgsize_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return ibs_caps & IBS_CAPS_OPDTLBPGSIZE ? attr->mode : 0; +} + +static struct attribute *fetch_attrs[] = { &format_attr_rand_en.attr, + &format_attr_swfilt.attr, NULL, }; @@ -593,9 +666,19 @@ static struct attribute *zen4_ibs_extensions_attrs[] = { NULL, }; -static struct attribute_group group_rand_en = { +static struct attribute *ibs_op_ldlat_cap_attrs[] = { + &ibs_op_ldlat_cap.attr.attr, + NULL, +}; + +static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = { + &ibs_op_dtlb_pgsize_cap.attr.attr, + NULL, +}; + +static struct attribute_group group_fetch_formats = { .name = "format", - .attrs = rand_en_attrs, + .attrs = fetch_attrs, }; static struct attribute_group group_fetch_l3missonly = { @@ -610,8 +693,20 @@ static struct attribute_group group_zen4_ibs_extensions = { .is_visible = zen4_ibs_extensions_is_visible, }; +static struct attribute_group group_ibs_op_ldlat_cap = { + .name = "caps", + .attrs = ibs_op_ldlat_cap_attrs, + .is_visible = ibs_op_ldlat_is_visible, +}; + +static struct attribute_group group_ibs_op_dtlb_pgsize_cap = { + .name = "caps", + .attrs = ibs_op_dtlb_pgsize_cap_attrs, + .is_visible = ibs_op_dtlb_pgsize_is_visible, +}; + static const struct attribute_group *fetch_attr_groups[] = { - &group_rand_en, + &group_fetch_formats, &empty_caps_group, NULL, }; @@ -628,6 +723,11 @@ cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i) return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; } +static struct attribute *op_attrs[] = { + &format_attr_swfilt.attr, + NULL, +}; + static struct attribute *cnt_ctl_attrs[] = { &format_attr_cnt_ctl.attr, NULL, @@ -638,6 +738,16 @@ static struct attribute *op_l3missonly_attrs[] = { NULL, }; +static struct attribute_group group_op_formats = { + .name = "format", + .attrs = op_attrs, +}; + +static struct attribute *ibs_op_ldlat_format_attrs[] = { + &ibs_op_ldlat_format.attr.attr, + NULL, +}; + static struct attribute_group group_cnt_ctl = { .name = "format", .attrs = cnt_ctl_attrs, @@ -650,10 +760,25 @@ static struct attribute_group group_op_l3missonly = { .is_visible = zen4_ibs_extensions_is_visible, }; +static const struct attribute_group *op_attr_groups[] = { + &group_op_formats, + &empty_caps_group, + NULL, +}; + +static struct attribute_group group_ibs_op_ldlat_format = { + .name = "format", + .attrs = ibs_op_ldlat_format_attrs, + .is_visible = ibs_op_ldlat_is_visible, +}; + static const struct attribute_group *op_attr_update[] = { &group_cnt_ctl, &group_op_l3missonly, &group_zen4_ibs_extensions, + &group_ibs_op_ldlat_cap, + &group_ibs_op_ldlat_format, + &group_ibs_op_dtlb_pgsize_cap, NULL, }; @@ -667,13 +792,14 @@ static struct perf_ibs perf_ibs_fetch = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .check_period = perf_ibs_check_period, }, .msr = MSR_AMD64_IBSFETCHCTL, - .config_mask = IBS_FETCH_CONFIG_MASK, + .config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN, .cnt_mask = IBS_FETCH_MAX_CNT, .enable_mask = IBS_FETCH_ENABLE, .valid_mask = IBS_FETCH_VAL, + .min_period = 0x10, .max_period = IBS_FETCH_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, @@ -691,14 +817,15 @@ static struct perf_ibs perf_ibs_op = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .check_period = perf_ibs_check_period, }, .msr = MSR_AMD64_IBSOPCTL, - .config_mask = IBS_OP_CONFIG_MASK, + .config_mask = IBS_OP_MAX_CNT, .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | IBS_OP_CUR_CNT_RAND, .enable_mask = IBS_OP_ENABLE, .valid_mask = IBS_OP_VAL, + .min_period = 0x90, .max_period = IBS_OP_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, .offset_max = MSR_AMD64_IBSOP_REG_COUNT, @@ -900,6 +1027,10 @@ static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, if (!op_data3->dc_lin_addr_valid) return; + if ((ibs_caps & IBS_CAPS_OPDTLBPGSIZE) && + !op_data3->dc_phy_addr_valid) + return; + if (!op_data3->dc_l1tlb_miss) { data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; return; @@ -924,6 +1055,8 @@ static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, data_src->mem_lock = PERF_MEM_LOCK_LOCKED; } +/* Be careful. Works only for contiguous MSRs. */ +#define ibs_fetch_msr_idx(msr) (msr - MSR_AMD64_IBSFETCHCTL) #define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, @@ -1004,21 +1137,92 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type, } } -static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, +static bool perf_ibs_is_mem_sample_type(struct perf_ibs *perf_ibs, + struct perf_event *event) +{ + u64 sample_type = event->attr.sample_type; + + return perf_ibs == &perf_ibs_op && + sample_type & (PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_WEIGHT_TYPE | + PERF_SAMPLE_ADDR | + PERF_SAMPLE_PHYS_ADDR); +} + +static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, + struct perf_event *event, int check_rip) { - if (sample_type & PERF_SAMPLE_RAW || - (perf_ibs == &perf_ibs_op && - (sample_type & PERF_SAMPLE_DATA_SRC || - sample_type & PERF_SAMPLE_WEIGHT_TYPE || - sample_type & PERF_SAMPLE_ADDR || - sample_type & PERF_SAMPLE_PHYS_ADDR))) + if (event->attr.sample_type & PERF_SAMPLE_RAW || + perf_ibs_is_mem_sample_type(perf_ibs, event) || + perf_ibs_ldlat_event(perf_ibs, event)) return perf_ibs->offset_max; else if (check_rip) return 3; return 1; } +static bool perf_ibs_is_kernel_data_addr(struct perf_event *event, + struct perf_ibs_data *ibs_data) +{ + u64 sample_type_mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_RAW; + union ibs_op_data3 op_data3; + u64 dc_lin_addr; + + op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; + + return unlikely((event->attr.sample_type & sample_type_mask) && + op_data3.dc_lin_addr_valid && kernel_ip(dc_lin_addr)); +} + +static bool perf_ibs_is_kernel_br_target(struct perf_event *event, + struct perf_ibs_data *ibs_data, + int br_target_idx) +{ + union ibs_op_data op_data; + u64 br_target; + + op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; + br_target = ibs_data->regs[br_target_idx]; + + return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) && + op_data.op_brn_ret && kernel_ip(br_target)); +} + +static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event, + struct pt_regs *regs, struct perf_ibs_data *ibs_data, + int br_target_idx) +{ + if (perf_exclude_event(event, regs)) + return true; + + if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel) + return false; + + if (perf_ibs_is_kernel_data_addr(event, ibs_data)) + return true; + + if (br_target_idx != -1 && + perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx)) + return true; + + return false; +} + +static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs, + struct perf_ibs_data *ibs_data) +{ + if (perf_ibs == &perf_ibs_op) { + ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18); + ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0; + return; + } + + ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52); + ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0; +} + static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); @@ -1031,6 +1235,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) int offset, size, check_rip, offset_max, throttle = 0; unsigned int msr; u64 *buf, *config, period, new_config = 0; + int br_target_idx = -1; if (!test_bit(IBS_STARTED, pcpu->state)) { fail: @@ -1052,7 +1257,7 @@ fail: hwc = &event->hw; msr = hwc->config_base; buf = ibs_data.regs; - rdmsrl(msr, *buf); + rdmsrq(msr, *buf); if (!(*buf++ & perf_ibs->valid_mask)) goto fail; @@ -1067,15 +1272,31 @@ fail: offset = 1; check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); - offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); + offset_max = perf_ibs_get_offset_max(perf_ibs, event, check_rip); do { - rdmsrl(msr + offset, *buf++); + rdmsrq(msr + offset, *buf++); size++; offset = find_next_bit(perf_ibs->offset_mask, perf_ibs->offset_max, offset + 1); } while (offset < offset_max); + + if (perf_ibs_ldlat_event(perf_ibs, event)) { + union ibs_op_data3 op_data3; + + op_data3.val = ibs_data.regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + /* + * Opening event is errored out if load latency threshold is + * outside of [128, 2048] range. Since the event has reached + * interrupt handler, we can safely assume the threshold is + * within [128, 2048] range. + */ + if (!op_data3.ld_op || !op_data3.dc_miss || + op_data3.dc_miss_lat <= (event->attr.config1 & 0xFFF)) + goto out; + } + /* * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately * depending on their availability. @@ -1084,16 +1305,17 @@ fail: if (event->attr.sample_type & PERF_SAMPLE_RAW) { if (perf_ibs == &perf_ibs_op) { if (ibs_caps & IBS_CAPS_BRNTRGT) { - rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + rdmsrq(MSR_AMD64_IBSBRTARGET, *buf++); + br_target_idx = size; size++; } if (ibs_caps & IBS_CAPS_OPDATA4) { - rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + rdmsrq(MSR_AMD64_IBSOPDATA4, *buf++); size++; } } if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { - rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); + rdmsrq(MSR_AMD64_ICIBSEXTDCTL, *buf++); size++; } } @@ -1111,6 +1333,21 @@ fail: regs.flags |= PERF_EFLAGS_EXACT; } + if ((event->attr.config2 & IBS_SW_FILTER_MASK) && + perf_ibs_swfilt_discard(perf_ibs, event, ®s, &ibs_data, br_target_idx)) { + throttle = perf_event_account_interrupt(event); + goto out; + } + /* + * Prevent leaking physical addresses to unprivileged users. Skip + * PERF_SAMPLE_PHYS_ADDR check since generic code prevents it for + * unprivileged users. + */ + if ((event->attr.sample_type & PERF_SAMPLE_RAW) && + perf_allow_kernel()) { + perf_ibs_phyaddr_clear(perf_ibs, &ibs_data); + } + if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw = (struct perf_raw_record){ .frag = { @@ -1118,7 +1355,7 @@ fail: .data = ibs_data.data, }, }; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } if (perf_ibs == &perf_ibs_op) @@ -1129,14 +1366,15 @@ fail: * recorded as part of interrupt regs. Thus we need to use rip from * interrupt regs while unwinding call stack. */ - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - perf_sample_save_callchain(&data, event, iregs); + perf_sample_save_callchain(&data, event, iregs); throttle = perf_event_overflow(event, &data, ®s); + + if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) + hwc->sample_period = perf_ibs->min_period; + out: - if (throttle) { - perf_ibs_stop(event, 0); - } else { + if (!throttle) { if (perf_ibs == &perf_ibs_op) { if (ibs_caps & IBS_CAPS_OPCNTEXT) { new_config = period & IBS_OP_MAX_CNT_EXT_MASK; @@ -1222,13 +1460,14 @@ static __init int perf_ibs_op_init(void) if (ibs_caps & IBS_CAPS_OPCNTEXT) { perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; - perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; + perf_ibs_op.cnt_mask |= (IBS_OP_MAX_CNT_EXT_MASK | + IBS_OP_CUR_CNT_EXT_MASK); } if (ibs_caps & IBS_CAPS_ZEN4) perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY; - perf_ibs_op.pmu.attr_groups = empty_attr_groups; + perf_ibs_op.pmu.attr_groups = op_attr_groups; perf_ibs_op.pmu.attr_update = op_attr_update; return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); @@ -1325,7 +1564,7 @@ static inline int ibs_eilvt_valid(void) preempt_disable(); - rdmsrl(MSR_AMD64_IBSCTL, val); + rdmsrq(MSR_AMD64_IBSCTL, val); offset = val & IBSCTL_LVT_OFFSET_MASK; if (!(val & IBSCTL_LVT_OFFSET_VALID)) { @@ -1440,7 +1679,7 @@ static inline int get_ibs_lvt_offset(void) { u64 val; - rdmsrl(MSR_AMD64_IBSCTL, val); + rdmsrq(MSR_AMD64_IBSCTL, val); if (!(val & IBSCTL_LVT_OFFSET_VALID)) return -EINVAL; diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index b15f7b950d2e..a721da9987dd 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -16,6 +16,8 @@ #include <linux/slab.h> #include <linux/amd-iommu.h> +#include <asm/msr.h> + #include "../perf_event.h" #include "iommu.h" @@ -30,7 +32,7 @@ #define GET_DOMID_MASK(x) (((x)->conf1 >> 16) & 0xFFFFULL) #define GET_PASID_MASK(x) (((x)->conf1 >> 32) & 0xFFFFFULL) -#define IOMMU_NAME_SIZE 16 +#define IOMMU_NAME_SIZE 24 struct perf_amd_iommu { struct list_head list; diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 19c7b76e21bc..d24da377df77 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/perf_event.h> +#include <asm/msr.h> #include <asm/perf_event.h> #include "../perf_event.h" @@ -61,19 +62,19 @@ struct branch_entry { static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val) { - wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); + wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val); } static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val) { - wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); + wrmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); } static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx) { u64 val; - rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val); + rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2, val); return val; } @@ -82,7 +83,7 @@ static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx) { u64 val; - rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); + rdmsrq(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val); return val; } @@ -333,7 +334,7 @@ void amd_pmu_lbr_reset(void) cpuc->last_task_ctx = NULL; cpuc->last_log_id = 0; - wrmsrl(MSR_AMD64_LBR_SELECT, 0); + wrmsrq(MSR_AMD64_LBR_SELECT, 0); } void amd_pmu_lbr_add(struct perf_event *event) @@ -371,7 +372,8 @@ void amd_pmu_lbr_del(struct perf_event *event) perf_sched_cb_dec(event->pmu); } -void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -395,16 +397,16 @@ void amd_pmu_lbr_enable_all(void) /* Set hardware branch filter */ if (cpuc->lbr_select) { lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK; - wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); + wrmsrq(MSR_AMD64_LBR_SELECT, lbr_select); } if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); } - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); + rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } void amd_pmu_lbr_disable_all(void) diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index 37d5b380516e..dad42790cf7d 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "../perf_event.h" /* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */ @@ -48,8 +49,8 @@ static void event_update(struct perf_event *event) prev_pwr_acc = hwc->pwr_acc; prev_ptsc = hwc->ptsc; - rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc); - rdmsrl(MSR_F15H_PTSC, new_ptsc); + rdmsrq(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc); + rdmsrq(MSR_F15H_PTSC, new_ptsc); /* * Calculate the CU power consumption over a time period, the unit of @@ -75,8 +76,8 @@ static void __pmu_event_start(struct perf_event *event) event->hw.state = 0; - rdmsrl(MSR_F15H_PTSC, event->hw.ptsc); - rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc); + rdmsrq(MSR_F15H_PTSC, event->hw.ptsc); + rdmsrq(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc); } static void pmu_event_start(struct perf_event *event, int mode) @@ -272,7 +273,7 @@ static int __init amd_power_pmu_init(void) cpu_pwr_sample_ratio = cpuid_ecx(0x80000007); - if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) { + if (rdmsrq_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) { pr_err("Failed to read max compute unit power accumulator MSR\n"); return -ENODEV; } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 4ccb8fa483e6..e8b6af199c73 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -21,6 +21,7 @@ #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 #define NUM_COUNTERS_L3 6 +#define NUM_COUNTERS_MAX 64 #define RDPMC_BASE_NB 6 #define RDPMC_BASE_LLC 10 @@ -38,7 +39,10 @@ struct amd_uncore_ctx { int refcnt; int cpu; struct perf_event **events; - struct hlist_node node; + unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)]; + int nr_active; + struct hrtimer hrtimer; + u64 hrtimer_duration; }; struct amd_uncore_pmu { @@ -83,11 +87,51 @@ struct amd_uncore { static struct amd_uncore uncores[UNCORE_TYPE_MAX]; +/* Interval for hrtimer, defaults to 60000 milliseconds */ +static unsigned int update_interval = 60 * MSEC_PER_SEC; +module_param(update_interval, uint, 0444); + static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) { return container_of(event->pmu, struct amd_uncore_pmu, pmu); } +static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer) +{ + struct amd_uncore_ctx *ctx; + struct perf_event *event; + int bit; + + ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer); + + if (!ctx->nr_active || ctx->cpu != smp_processor_id()) + return HRTIMER_NORESTART; + + for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) { + event = ctx->events[bit]; + event->pmu->read(event); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration)); + return HRTIMER_RESTART; +} + +static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration), + HRTIMER_MODE_REL_PINNED_HARD); +} + +static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_cancel(&ctx->hrtimer); +} + +static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx) +{ + hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); +} + static void amd_uncore_read(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -106,9 +150,9 @@ static void amd_uncore_read(struct perf_event *event) * read counts directly from the corresponding PERF_CTR. */ if (hwc->event_base_rdpmc < 0) - rdmsrl(hwc->event_base, new); + rdmsrq(hwc->event_base, new); else - rdpmcl(hwc->event_base_rdpmc, new); + new = rdpmc(hwc->event_base_rdpmc); local64_set(&hwc->prev_count, new); delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); @@ -118,27 +162,40 @@ static void amd_uncore_read(struct perf_event *event) static void amd_uncore_start(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; + if (!ctx->nr_active++) + amd_uncore_start_hrtimer(ctx); + if (flags & PERF_EF_RELOAD) - wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count)); hwc->state = 0; - wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); + __set_bit(hwc->idx, ctx->active_mask); + wrmsrq(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); perf_event_update_userpage(event); } static void amd_uncore_stop(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config); hwc->state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { event->pmu->read(event); hwc->state |= PERF_HES_UPTODATE; } + + if (!--ctx->nr_active) + amd_uncore_cancel_hrtimer(ctx); + + __clear_bit(hwc->idx, ctx->active_mask); } static int amd_uncore_add(struct perf_event *event, int flags) @@ -162,7 +219,9 @@ static int amd_uncore_add(struct perf_event *event, int flags) /* if not, take the first available counter */ hwc->idx = -1; for (i = 0; i < pmu->num_counters; i++) { - if (cmpxchg(&ctx->events[i], NULL, event) == NULL) { + struct perf_event *tmp = NULL; + + if (try_cmpxchg(&ctx->events[i], &tmp, event)) { hwc->idx = i; break; } @@ -196,7 +255,9 @@ static void amd_uncore_del(struct perf_event *event, int flags) event->pmu->stop(event, PERF_EF_UPDATE); for (i = 0; i < pmu->num_counters; i++) { - if (cmpxchg(&ctx->events[i], event, NULL) == event) + struct perf_event *tmp = event; + + if (try_cmpxchg(&ctx->events[i], &tmp, NULL)) break; } @@ -487,6 +548,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) goto fail; } + amd_uncore_init_hrtimer(curr); + curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC; + cpumask_set_cpu(cpu, &pmu->active_mask); } @@ -639,7 +703,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) info.split.aux_data = 0; info.split.num_pmcs = NUM_COUNTERS_NB; info.split.gid = 0; - info.split.cid = topology_die_id(cpu); + info.split.cid = topology_logical_package_id(cpu); if (pmu_version >= 2) { ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); @@ -654,17 +718,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { struct attribute **df_attr = amd_uncore_df_format_attr; struct amd_uncore_pmu *pmu; + int num_counters; /* Run just once */ if (uncore->init_done) return amd_uncore_ctx_init(uncore, cpu); + num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + if (!num_counters) + goto done; + /* No grouping, single instance for a system */ uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); - if (!uncore->pmus) { - uncore->num_pmus = 0; + if (!uncore->pmus) goto done; - } /* * For Family 17h and above, the Northbridge counters are repurposed @@ -674,7 +741,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", sizeof(pmu->name)); - pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + pmu->num_counters = num_counters; pmu->msr_base = MSR_F15H_NB_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_NB; pmu->group = amd_uncore_ctx_gid(uncore, cpu); @@ -785,17 +852,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { struct attribute **l3_attr = amd_uncore_l3_format_attr; struct amd_uncore_pmu *pmu; + int num_counters; /* Run just once */ if (uncore->init_done) return amd_uncore_ctx_init(uncore, cpu); + num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + if (!num_counters) + goto done; + /* No grouping, single instance for a system */ uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); - if (!uncore->pmus) { - uncore->num_pmus = 0; + if (!uncore->pmus) goto done; - } /* * For Family 17h and above, L3 cache counters are available instead @@ -805,7 +875,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", sizeof(pmu->name)); - pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); + pmu->num_counters = num_counters; pmu->msr_base = MSR_F16H_L2I_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_LLC; pmu->group = amd_uncore_ctx_gid(uncore, cpu); @@ -870,16 +940,55 @@ static int amd_uncore_umc_event_init(struct perf_event *event) static void amd_uncore_umc_start(struct perf_event *event, int flags) { + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; + if (!ctx->nr_active++) + amd_uncore_start_hrtimer(ctx); + if (flags & PERF_EF_RELOAD) - wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + wrmsrq(hwc->event_base, (u64)local64_read(&hwc->prev_count)); hwc->state = 0; - wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); + __set_bit(hwc->idx, ctx->active_mask); + wrmsrq(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); perf_event_update_userpage(event); } +static void amd_uncore_umc_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new, shift; + s64 delta; + + shift = COUNTER_SHIFT + 1; + prev = local64_read(&hwc->prev_count); + + /* + * UMC counters do not have RDPMC assignments. Read counts directly + * from the corresponding PERF_CTR. + */ + rdmsrl(hwc->event_base, new); + + /* + * Unlike the other uncore counters, UMC counters saturate and set the + * Overflow bit (bit 48) on overflow. Since they do not roll over, + * proactively reset the corresponding PERF_CTR when bit 47 is set so + * that the counter never gets a chance to saturate. + */ + if (new & BIT_ULL(63 - COUNTER_SHIFT)) { + wrmsrl(hwc->event_base, 0); + local64_set(&hwc->prev_count, 0); + } else { + local64_set(&hwc->prev_count, new); + } + + delta = (new << shift) - (prev << shift); + delta >>= shift; + local64_add(delta, &event->count); +} + static void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { @@ -893,8 +1002,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx); info.split.aux_data = ecx; /* stash active mask */ info.split.num_pmcs = ebx.split.num_umc_pmc; - info.split.gid = topology_die_id(cpu); - info.split.cid = topology_die_id(cpu); + info.split.gid = topology_logical_package_id(cpu); + info.split.cid = topology_logical_package_id(cpu); *per_cpu_ptr(uncore->info, cpu) = info; } @@ -906,7 +1015,8 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 }; union amd_uncore_info info; struct amd_uncore_pmu *pmu; - int index = 0, gid, i; + int gid, i; + u16 index = 0; if (pmu_version < 2) return 0; @@ -938,7 +1048,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) { for (i = 0; i < group_num_pmus[gid]; i++) { pmu = &uncore->pmus[index]; - snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index); + snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index); pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid]; pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2; pmu->rdpmc_base = -1; @@ -957,7 +1067,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) .del = amd_uncore_del, .start = amd_uncore_umc_start, .stop = amd_uncore_stop, - .read = amd_uncore_read, + .read = amd_uncore_umc_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, .module = THIS_MODULE, }; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 5b0dd07b1ef1..7610f26dfbd9 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -32,6 +32,7 @@ #include <asm/apic.h> #include <asm/stacktrace.h> +#include <asm/msr.h> #include <asm/nmi.h> #include <asm/smp.h> #include <asm/alternative.h> @@ -41,6 +42,8 @@ #include <asm/desc.h> #include <asm/ldt.h> #include <asm/unwind.h> +#include <asm/uprobes.h> +#include <asm/ibt.h> #include "perf_event.h" @@ -85,13 +88,19 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling); DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling); DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task); -DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx); DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases); DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter); +DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup); + +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all); +DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all); + /* * This one is magic, it will get called even when PMU init fails (because * there is no PMU), in which case it should simply return NULL. @@ -131,7 +140,7 @@ u64 x86_perf_event_update(struct perf_event *event) */ prev_raw_count = local64_read(&hwc->prev_count); do { - rdpmcl(hwc->event_base_rdpmc, new_raw_count); + new_raw_count = rdpmc(hwc->event_base_rdpmc); } while (!local64_try_cmpxchg(&hwc->prev_count, &prev_raw_count, new_raw_count)); @@ -189,29 +198,31 @@ static DEFINE_MUTEX(pmc_reserve_mutex); #ifdef CONFIG_X86_LOCAL_APIC -static inline int get_possible_num_counters(void) +static inline u64 get_possible_counter_mask(void) { - int i, num_counters = x86_pmu.num_counters; + u64 cntr_mask = x86_pmu.cntr_mask64; + int i; if (!is_hybrid()) - return num_counters; + return cntr_mask; for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) - num_counters = max_t(int, num_counters, x86_pmu.hybrid_pmu[i].num_counters); + cntr_mask |= x86_pmu.hybrid_pmu[i].cntr_mask64; - return num_counters; + return cntr_mask; } static bool reserve_pmc_hardware(void) { - int i, num_counters = get_possible_num_counters(); + u64 cntr_mask = get_possible_counter_mask(); + int i, end; - for (i = 0; i < num_counters; i++) { + for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) { if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) goto perfctr_fail; } - for (i = 0; i < num_counters; i++) { + for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) { if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) goto eventsel_fail; } @@ -219,13 +230,14 @@ static bool reserve_pmc_hardware(void) return true; eventsel_fail: - for (i--; i >= 0; i--) + end = i; + for_each_set_bit(i, (unsigned long *)&cntr_mask, end) release_evntsel_nmi(x86_pmu_config_addr(i)); - - i = num_counters; + i = X86_PMC_IDX_MAX; perfctr_fail: - for (i--; i >= 0; i--) + end = i; + for_each_set_bit(i, (unsigned long *)&cntr_mask, end) release_perfctr_nmi(x86_pmu_event_addr(i)); return false; @@ -233,9 +245,10 @@ perfctr_fail: static void release_pmc_hardware(void) { - int i, num_counters = get_possible_num_counters(); + u64 cntr_mask = get_possible_counter_mask(); + int i; - for (i = 0; i < num_counters; i++) { + for_each_set_bit(i, (unsigned long *)&cntr_mask, X86_PMC_IDX_MAX) { release_perfctr_nmi(x86_pmu_event_addr(i)); release_evntsel_nmi(x86_pmu_config_addr(i)); } @@ -248,7 +261,8 @@ static void release_pmc_hardware(void) {} #endif -bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) +bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask, + unsigned long *fixed_cntr_mask) { u64 val, val_fail = -1, val_new= ~0; int i, reg, reg_fail = -1, ret = 0; @@ -259,9 +273,9 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) * Check to see if the BIOS enabled any of the counters, if so * complain and bail. */ - for (i = 0; i < num_counters; i++) { + for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) { reg = x86_pmu_config_addr(i); - ret = rdmsrl_safe(reg, &val); + ret = rdmsrq_safe(reg, &val); if (ret) goto msr_fail; if (val & ARCH_PERFMON_EVENTSEL_ENABLE) { @@ -273,12 +287,12 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) } } - if (num_counters_fixed) { + if (*(u64 *)fixed_cntr_mask) { reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - ret = rdmsrl_safe(reg, &val); + ret = rdmsrq_safe(reg, &val); if (ret) goto msr_fail; - for (i = 0; i < num_counters_fixed; i++) { + for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) { if (fixed_counter_disabled(i, pmu)) continue; if (val & (0x03ULL << i*4)) { @@ -306,11 +320,11 @@ bool check_hw_exists(struct pmu *pmu, int num_counters, int num_counters_fixed) * (qemu/kvm) that don't trap on the MSR access and always return 0s. */ reg = x86_pmu_event_addr(reg_safe); - if (rdmsrl_safe(reg, &val)) + if (rdmsrq_safe(reg, &val)) goto msr_fail; val ^= 0xffffUL; - ret = wrmsrl_safe(reg, val); - ret |= rdmsrl_safe(reg, &val_new); + ret = wrmsrq_safe(reg, val); + ret |= rdmsrq_safe(reg, &val_new); if (ret || val != val_new) goto msr_fail; @@ -619,9 +633,9 @@ int x86_pmu_hw_config(struct perf_event *event) event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; if (event->attr.type == event->pmu->type) - event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; + event->hw.config |= x86_pmu_get_event_config(event); - if (event->attr.sample_period && x86_pmu.limit_period) { + if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) @@ -666,6 +680,7 @@ static int __x86_pmu_event_init(struct perf_event *event) event->hw.idx = -1; event->hw.last_cpu = -1; event->hw.last_tag = ~0ULL; + event->hw.dyn_constraint = ~0ULL; /* mark unused */ event->hw.extra_reg.idx = EXTRA_REG_NONE; @@ -679,19 +694,19 @@ void x86_pmu_disable_all(void) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { struct hw_perf_event *hwc = &cpuc->events[idx]->hw; u64 val; if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(x86_pmu_config_addr(idx), val); + rdmsrq(x86_pmu_config_addr(idx), val); if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) continue; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu_config_addr(idx), val); + wrmsrq(x86_pmu_config_addr(idx), val); if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_config_addr(idx + 1), 0); + wrmsrq(x86_pmu_config_addr(idx + 1), 0); } } @@ -736,7 +751,7 @@ void x86_pmu_enable_all(int added) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { struct hw_perf_event *hwc = &cpuc->events[idx]->hw; if (!test_bit(idx, cpuc->active_mask)) @@ -746,17 +761,18 @@ void x86_pmu_enable_all(int added) } } -static inline int is_x86_event(struct perf_event *event) +int is_x86_event(struct perf_event *event) { - int i; - - if (!is_hybrid()) - return event->pmu == &pmu; - - for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu) - return true; - } + /* + * For a non-hybrid platforms, the type of X86 pmu is + * always PERF_TYPE_RAW. + * For a hybrid platform, the PERF_PMU_CAP_EXTENDED_HW_TYPE + * is a unique capability for the X86 PMU. + * Use them to detect a X86 event. + */ + if (event->pmu->type == PERF_TYPE_RAW || + event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE) + return true; return false; } @@ -975,7 +991,6 @@ EXPORT_SYMBOL_GPL(perf_assign_events); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { - int num_counters = hybrid(cpuc->pmu, num_counters); struct event_constraint *c; struct perf_event *e; int n0, i, wmin, wmax, unsched = 0; @@ -1051,7 +1066,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* slow path */ if (i != n) { - int gpmax = num_counters; + int gpmax = x86_pmu_max_num_counters(cpuc->pmu); /* * Do not allow scheduling of more than half the available @@ -1072,7 +1087,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * the extra Merge events needed by large increment events. */ if (x86_pmu.flags & PMU_FL_PAIR) { - gpmax = num_counters - cpuc->n_pair; + gpmax -= cpuc->n_pair; WARN_ON(gpmax <= 0); } @@ -1157,12 +1172,10 @@ static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event, */ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) { - int num_counters = hybrid(cpuc->pmu, num_counters); - int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); struct perf_event *event; int n, max_count; - max_count = num_counters + num_counters_fixed; + max_count = x86_pmu_num_counters(cpuc->pmu) + x86_pmu_num_counters_fixed(cpuc->pmu); /* current number of events already accepted */ n = cpuc->n_events; @@ -1234,8 +1247,7 @@ static inline void x86_assign_hw_event(struct perf_event *event, fallthrough; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1: hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + - (idx - INTEL_PMC_IDX_FIXED); + hwc->event_base = x86_pmu_fixed_ctr_addr(idx - INTEL_PMC_IDX_FIXED); hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | INTEL_PMC_FIXED_RDPMC_BASE; break; @@ -1295,6 +1307,15 @@ static void x86_pmu_enable(struct pmu *pmu) if (cpuc->n_added) { int n_running = cpuc->n_events - cpuc->n_added; + + /* + * The late setup (after counters are scheduled) + * is required for some cases, e.g., PEBS counters + * snapshotting. Because an accurate counter index + * is needed. + */ + static_call_cond(x86_pmu_late_setup)(); + /* * apply assignment obtained either from * hw_perf_group_sched_in() or x86_pmu_enable() @@ -1407,14 +1428,14 @@ int x86_perf_event_set_period(struct perf_event *event) */ local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + wrmsrq(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* * Sign extend the Merge event counter's upper 16 bits since * we currently declare a 48-bit counter width */ if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff); + wrmsrq(x86_pmu_event_addr(idx + 1), 0xffff); perf_event_update_userpage(event); @@ -1519,25 +1540,28 @@ static void x86_pmu_start(struct perf_event *event, int flags) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + unsigned long *cntr_mask, *fixed_cntr_mask; + struct event_constraint *pebs_constraints; + struct cpu_hw_events *cpuc; u64 pebs, debugctl; - int cpu = smp_processor_id(); - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - int num_counters = hybrid(cpuc->pmu, num_counters); - int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); - struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); - unsigned long flags; - int idx; + int cpu, idx; - if (!num_counters) - return; + guard(irqsave)(); - local_irq_save(flags); + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_events, cpu); + cntr_mask = hybrid(cpuc->pmu, cntr_mask); + fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); + pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); + + if (!*(u64 *)cntr_mask) + return; if (x86_pmu.version >= 2) { - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + rdmsrq(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); pr_info("\n"); pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); @@ -1545,19 +1569,19 @@ void perf_event_print_debug(void) pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); if (pebs_constraints) { - rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); + rdmsrq(MSR_IA32_PEBS_ENABLE, pebs); pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } if (x86_pmu.lbr_nr) { - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl); } } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); - for (idx = 0; idx < num_counters; idx++) { - rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); - rdmsrl(x86_pmu_event_addr(idx), pmc_count); + for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) { + rdmsrq(x86_pmu_config_addr(idx), pmc_ctrl); + rdmsrq(x86_pmu_event_addr(idx), pmc_count); prev_left = per_cpu(pmc_prev_left[idx], cpu); @@ -1568,15 +1592,14 @@ void perf_event_print_debug(void) pr_info("CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } - for (idx = 0; idx < num_counters_fixed; idx++) { + for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) { if (fixed_counter_disabled(idx, cpuc->pmu)) continue; - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); + rdmsrq(x86_pmu_fixed_ctr_addr(idx), pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", cpu, idx, pmc_count); } - local_irq_restore(flags); } void x86_pmu_stop(struct perf_event *event, int flags) @@ -1668,6 +1691,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_events *cpuc; struct perf_event *event; int idx, handled = 0; + u64 last_period; u64 val; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1682,11 +1706,12 @@ int x86_pmu_handle_irq(struct pt_regs *regs) */ apic_write(APIC_LVTPC, APIC_DM_NMI); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { if (!test_bit(idx, cpuc->active_mask)) continue; event = cpuc->events[idx]; + last_period = event->hw.last_period; val = static_call(x86_pmu_update)(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) @@ -1700,13 +1725,11 @@ int x86_pmu_handle_irq(struct pt_regs *regs) if (!static_call(x86_pmu_set_period)(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); - if (has_branch_stack(event)) - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } if (handled) @@ -2024,13 +2047,19 @@ static void x86_pmu_static_call_update(void) static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling); static_call_update(x86_pmu_sched_task, x86_pmu.sched_task); - static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx); static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs); static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases); static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs); static_call_update(x86_pmu_filter, x86_pmu.filter); + + static_call_update(x86_pmu_late_setup, x86_pmu.late_setup); + + static_call_update(x86_pmu_pebs_enable, x86_pmu.pebs_enable); + static_call_update(x86_pmu_pebs_disable, x86_pmu.pebs_disable); + static_call_update(x86_pmu_pebs_enable_all, x86_pmu.pebs_enable_all); + static_call_update(x86_pmu_pebs_disable_all, x86_pmu.pebs_disable_all); } static void _x86_pmu_read(struct perf_event *event) @@ -2038,18 +2067,15 @@ static void _x86_pmu_read(struct perf_event *event) static_call(x86_pmu_update)(event); } -void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, - u64 intel_ctrl) +void x86_pmu_show_pmu_cap(struct pmu *pmu) { pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); - pr_info("... generic registers: %d\n", num_counters); + pr_info("... generic registers: %d\n", x86_pmu_num_counters(pmu)); pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose events: %lu\n", - hweight64((((1ULL << num_counters_fixed) - 1) - << INTEL_PMC_IDX_FIXED) & intel_ctrl)); - pr_info("... event mask: %016Lx\n", intel_ctrl); + pr_info("... fixed-purpose events: %d\n", x86_pmu_num_counters_fixed(pmu)); + pr_info("... event mask: %016Lx\n", hybrid(pmu, intel_ctrl)); } static int __init init_hw_perf_events(void) @@ -2086,7 +2112,7 @@ static int __init init_hw_perf_events(void) pmu_check_apic(); /* sanity check that the hardware exists or is emulated */ - if (!check_hw_exists(&pmu, x86_pmu.num_counters, x86_pmu.num_counters_fixed)) + if (!check_hw_exists(&pmu, x86_pmu.cntr_mask, x86_pmu.fixed_cntr_mask)) goto out_bad_pmu; pr_cont("%s PMU driver.\n", x86_pmu.name); @@ -2097,14 +2123,17 @@ static int __init init_hw_perf_events(void) quirk->func(); if (!x86_pmu.intel_ctrl) - x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + x86_pmu.intel_ctrl = x86_pmu.cntr_mask64; + + if (!x86_pmu.config_mask) + x86_pmu.config_mask = X86_RAW_EVENT_MASK; perf_events_lapic_init(); register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, - 0, x86_pmu.num_counters, 0, 0); + __EVENT_CONSTRAINT(0, x86_pmu.cntr_mask64, + 0, x86_pmu_num_counters(NULL), 0, 0); x86_pmu_format_group.attrs = x86_pmu.format_attrs; @@ -2113,11 +2142,8 @@ static int __init init_hw_perf_events(void) pmu.attr_update = x86_pmu.attr_update; - if (!is_hybrid()) { - x86_pmu_show_pmu_cap(x86_pmu.num_counters, - x86_pmu.num_counters_fixed, - x86_pmu.intel_ctrl); - } + if (!is_hybrid()) + x86_pmu_show_pmu_cap(NULL); if (!x86_pmu.read) x86_pmu.read = _x86_pmu_read; @@ -2481,12 +2507,12 @@ void perf_clear_dirty_counters(void) for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { if (i >= INTEL_PMC_IDX_FIXED) { /* Metrics and fake events don't have corresponding HW counters. */ - if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed)) + if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask))) continue; - wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); + wrmsrq(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0); } else { - wrmsrl(x86_pmu_event_addr(i), 0); + wrmsrq(x86_pmu_event_addr(i), 0); } } @@ -2547,6 +2573,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev, struct device_attribute *attr, const char *buf, size_t count) { + static DEFINE_MUTEX(rdpmc_mutex); unsigned long val; ssize_t ret; @@ -2560,6 +2587,8 @@ static ssize_t set_attr_rdpmc(struct device *cdev, if (x86_pmu.attr_rdpmc_broken) return -ENOTSUPP; + guard(mutex)(&rdpmc_mutex); + if (val != x86_pmu.attr_rdpmc) { /* * Changing into or out of never available or always available, @@ -2621,15 +2650,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; -static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) -{ - static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in); -} - -static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) +static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { - static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc); + static_call_cond(x86_pmu_sched_task)(pmu_ctx, task, sched_in); } void perf_check_microcode(void) @@ -2696,7 +2720,6 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .sched_task = x86_pmu_sched_task, - .swap_task_ctx = x86_pmu_swap_task_ctx, .check_period = x86_pmu_check_period, .aux_output_match = x86_pmu_aux_output_match, @@ -2794,8 +2817,15 @@ static unsigned long get_segment_base(unsigned int segment) #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; + /* + * If we're not in a valid context with a real (not just lazy) + * user mm, then don't even try. + */ + if (!nmi_uaccess_okay()) + return 0; + /* IRQs are off, so this synchronizes with smp_store_release */ - ldt = READ_ONCE(current->active_mm->context.ldt); + ldt = smp_load_acquire(¤t->mm->context.ldt); if (!ldt || idx >= ldt->nr_entries) return 0; @@ -2813,6 +2843,46 @@ static unsigned long get_segment_base(unsigned int segment) return get_desc_base(desc); } +#ifdef CONFIG_UPROBES +/* + * Heuristic-based check if uprobe is installed at the function entry. + * + * Under assumption of user code being compiled with frame pointers, + * `push %rbp/%ebp` is a good indicator that we indeed are. + * + * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern. + * If we get this wrong, captured stack trace might have one extra bogus + * entry, but the rest of stack trace will still be meaningful. + */ +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + struct arch_uprobe *auprobe; + + if (!current->utask) + return false; + + auprobe = current->utask->auprobe; + if (!auprobe) + return false; + + /* push %rbp/%ebp */ + if (auprobe->insn[0] == 0x55) + return true; + + /* endbr64 (64-bit only) */ + if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn)) + return true; + + return false; +} + +#else +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ + #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> @@ -2824,6 +2894,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent unsigned long ss_base, cs_base; struct stack_frame_ia32 frame; const struct stack_frame_ia32 __user *fp; + u32 ret_addr; if (user_64bit_mode(regs)) return 0; @@ -2833,6 +2904,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent fp = compat_ptr(ss_base + regs->bp); pagefault_disable(); + + /* see perf_callchain_user() below for why we do this */ + if (is_uprobe_at_func_entry(regs) && + !get_user(ret_addr, (const u32 __user *)regs->sp)) + perf_callchain_store(entry, ret_addr); + while (entry->nr < entry->max_stack) { if (!valid_user_frame(fp, sizeof(frame))) break; @@ -2861,6 +2938,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs { struct stack_frame frame; const struct stack_frame __user *fp; + unsigned long ret_addr; if (perf_guest_state()) { /* TODO: We don't support guest os callchain now */ @@ -2884,6 +2962,19 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs return; pagefault_disable(); + + /* + * If we are called from uprobe handler, and we are indeed at the very + * entry to user function (which is normally a `push %rbp` instruction, + * under assumption of application being compiled with frame pointers), + * we should read return address from *regs->sp before proceeding + * to follow frame pointers, otherwise we'll skip immediate caller + * as %rbp is not yet setup. + */ + if (is_uprobe_at_func_entry(regs) && + !get_user(ret_addr, (const unsigned long __user *)regs->sp)) + perf_callchain_store(entry, ret_addr); + while (entry->nr < entry->max_stack) { if (!valid_user_frame(fp, sizeof(frame))) break; @@ -2937,35 +3028,57 @@ static unsigned long code_segment_base(struct pt_regs *regs) return 0; } -unsigned long perf_instruction_pointer(struct pt_regs *regs) +unsigned long perf_arch_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_state()) - return perf_guest_get_ip(); - return regs->ip + code_segment_base(regs); } -unsigned long perf_misc_flags(struct pt_regs *regs) +static unsigned long common_misc_flags(struct pt_regs *regs) { - unsigned int guest_state = perf_guest_state(); - int misc = 0; + if (regs->flags & PERF_EFLAGS_EXACT) + return PERF_RECORD_MISC_EXACT_IP; - if (guest_state) { - if (guest_state & PERF_GUEST_USER) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } + return 0; +} - if (regs->flags & PERF_EFLAGS_EXACT) - misc |= PERF_RECORD_MISC_EXACT_IP; +static unsigned long guest_misc_flags(struct pt_regs *regs) +{ + unsigned long guest_state = perf_guest_state(); + + if (!(guest_state & PERF_GUEST_ACTIVE)) + return 0; + + if (guest_state & PERF_GUEST_USER) + return PERF_RECORD_MISC_GUEST_USER; + else + return PERF_RECORD_MISC_GUEST_KERNEL; + +} + +static unsigned long host_misc_flags(struct pt_regs *regs) +{ + if (user_mode(regs)) + return PERF_RECORD_MISC_USER; + else + return PERF_RECORD_MISC_KERNEL; +} + +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) +{ + unsigned long flags = common_misc_flags(regs); + + flags |= guest_misc_flags(regs); + + return flags; +} + +unsigned long perf_arch_misc_flags(struct pt_regs *regs) +{ + unsigned long flags = common_misc_flags(regs); + + flags |= host_misc_flags(regs); - return misc; + return flags; } void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) @@ -2983,8 +3096,8 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) * base PMU holds the correct number of counters for P-cores. */ cap->version = x86_pmu.version; - cap->num_counters_gp = x86_pmu.num_counters; - cap->num_counters_fixed = x86_pmu.num_counters_fixed; + cap->num_counters_gp = x86_pmu_num_counters(NULL); + cap->num_counters_fixed = x86_pmu_num_counters_fixed(NULL); cap->bit_width_gp = x86_pmu.cntval_bits; cap->bit_width_fixed = x86_pmu.cntval_bits; cap->events_mask = (unsigned int)x86_pmu.events_maskl; diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 974e917e65b2..61da6b8a3d51 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -17,6 +17,7 @@ #include <linux/sizes.h> #include <asm/perf_event.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -36,7 +37,7 @@ enum { BTS_STATE_ACTIVE, }; -static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); +static struct bts_ctx __percpu *bts_ctx; #define BTS_RECORD_SIZE 24 #define BTS_SAFETY_MARGIN 4080 @@ -58,7 +59,7 @@ struct bts_buffer { local_t head; unsigned long end; void **data_pages; - struct bts_phys buf[]; + struct bts_phys buf[] __counted_by(nr_bufs); }; static struct pmu bts_pmu; @@ -80,54 +81,54 @@ static void * bts_buffer_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - struct bts_buffer *buf; + struct bts_buffer *bb; struct page *page; int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; - int pg, nbuf, pad; + int pg, nr_buf, pad; /* count all the high order buffers */ - for (pg = 0, nbuf = 0; pg < nr_pages;) { + for (pg = 0, nr_buf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); pg += buf_nr_pages(page); - nbuf++; + nr_buf++; } /* * to avoid interrupts in overwrite mode, only allow one physical */ - if (overwrite && nbuf > 1) + if (overwrite && nr_buf > 1) return NULL; - buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node); - if (!buf) + bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node); + if (!bb) return NULL; - buf->nr_pages = nr_pages; - buf->nr_bufs = nbuf; - buf->snapshot = overwrite; - buf->data_pages = pages; - buf->real_size = size - size % BTS_RECORD_SIZE; + bb->nr_pages = nr_pages; + bb->nr_bufs = nr_buf; + bb->snapshot = overwrite; + bb->data_pages = pages; + bb->real_size = size - size % BTS_RECORD_SIZE; - for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) { + for (pg = 0, nr_buf = 0, offset = 0, pad = 0; nr_buf < bb->nr_bufs; nr_buf++) { unsigned int __nr_pages; page = virt_to_page(pages[pg]); __nr_pages = buf_nr_pages(page); - buf->buf[nbuf].page = page; - buf->buf[nbuf].offset = offset; - buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); - buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement; - pad = buf->buf[nbuf].size % BTS_RECORD_SIZE; - buf->buf[nbuf].size -= pad; + bb->buf[nr_buf].page = page; + bb->buf[nr_buf].offset = offset; + bb->buf[nr_buf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); + bb->buf[nr_buf].size = buf_size(page) - bb->buf[nr_buf].displacement; + pad = bb->buf[nr_buf].size % BTS_RECORD_SIZE; + bb->buf[nr_buf].size -= pad; pg += __nr_pages; offset += __nr_pages << PAGE_SHIFT; } - return buf; + return bb; } static void bts_buffer_free_aux(void *data) @@ -135,25 +136,25 @@ static void bts_buffer_free_aux(void *data) kfree(data); } -static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx) +static unsigned long bts_buffer_offset(struct bts_buffer *bb, unsigned int idx) { - return buf->buf[idx].offset + buf->buf[idx].displacement; + return bb->buf[idx].offset + bb->buf[idx].displacement; } static void -bts_config_buffer(struct bts_buffer *buf) +bts_config_buffer(struct bts_buffer *bb) { int cpu = raw_smp_processor_id(); struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - struct bts_phys *phys = &buf->buf[buf->cur_buf]; + struct bts_phys *phys = &bb->buf[bb->cur_buf]; unsigned long index, thresh = 0, end = phys->size; struct page *page = phys->page; - index = local_read(&buf->head); + index = local_read(&bb->head); - if (!buf->snapshot) { - if (buf->end < phys->offset + buf_size(page)) - end = buf->end - phys->offset - phys->displacement; + if (!bb->snapshot) { + if (bb->end < phys->offset + buf_size(page)) + end = bb->end - phys->offset - phys->displacement; index -= phys->offset + phys->displacement; @@ -168,7 +169,7 @@ bts_config_buffer(struct bts_buffer *buf) ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement; ds->bts_index = ds->bts_buffer_base + index; ds->bts_absolute_maximum = ds->bts_buffer_base + end; - ds->bts_interrupt_threshold = !buf->snapshot + ds->bts_interrupt_threshold = !bb->snapshot ? ds->bts_buffer_base + thresh : ds->bts_absolute_maximum + BTS_RECORD_SIZE; } @@ -184,16 +185,16 @@ static void bts_update(struct bts_ctx *bts) { int cpu = raw_smp_processor_id(); struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *bb = perf_get_aux(&bts->handle); unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head; - if (!buf) + if (!bb) return; - head = index + bts_buffer_offset(buf, buf->cur_buf); - old = local_xchg(&buf->head, head); + head = index + bts_buffer_offset(bb, bb->cur_buf); + old = local_xchg(&bb->head, head); - if (!buf->snapshot) { + if (!bb->snapshot) { if (old == head) return; @@ -205,9 +206,9 @@ static void bts_update(struct bts_ctx *bts) * old and head are always in the same physical buffer, so we * can subtract them to get the data size. */ - local_add(head - old, &buf->data_size); + local_add(head - old, &bb->data_size); } else { - local_set(&buf->data_size, head); + local_set(&bb->data_size, head); } /* @@ -218,7 +219,7 @@ static void bts_update(struct bts_ctx *bts) } static int -bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle); /* * Ordering PMU callbacks wrt themselves and the PMI is done by means @@ -231,18 +232,18 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); static void __bts_event_start(struct perf_event *event) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); + struct bts_buffer *bb = perf_get_aux(&bts->handle); u64 config = 0; - if (!buf->snapshot) + if (!bb->snapshot) config |= ARCH_PERFMON_EVENTSEL_INT; if (!event->attr.exclude_kernel) config |= ARCH_PERFMON_EVENTSEL_OS; if (!event->attr.exclude_user) config |= ARCH_PERFMON_EVENTSEL_USR; - bts_config_buffer(buf); + bts_config_buffer(bb); /* * local barrier to make sure that ds configuration made it @@ -260,14 +261,14 @@ static void __bts_event_start(struct perf_event *event) static void bts_event_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf; + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); + struct bts_buffer *bb; - buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) + bb = perf_aux_output_begin(&bts->handle, event); + if (!bb) goto fail_stop; - if (bts_buffer_reset(buf, &bts->handle)) + if (bts_buffer_reset(bb, &bts->handle)) goto fail_end_stop; bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; @@ -290,7 +291,7 @@ fail_stop: static void __bts_event_stop(struct perf_event *event, int state) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ WRITE_ONCE(bts->state, state); @@ -305,28 +306,28 @@ static void __bts_event_stop(struct perf_event *event, int state) static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = NULL; + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); + struct bts_buffer *bb = NULL; int state = READ_ONCE(bts->state); if (state == BTS_STATE_ACTIVE) __bts_event_stop(event, BTS_STATE_STOPPED); if (state != BTS_STATE_STOPPED) - buf = perf_get_aux(&bts->handle); + bb = perf_get_aux(&bts->handle); event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); - if (buf) { - if (buf->snapshot) + if (bb) { + if (bb->snapshot) bts->handle.head = - local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); + local_xchg(&bb->data_size, + bb->nr_pages << PAGE_SHIFT); perf_aux_output_end(&bts->handle, - local_xchg(&buf->data_size, 0)); + local_xchg(&bb->data_size, 0)); } cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; @@ -338,9 +339,14 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - int state = READ_ONCE(bts->state); + struct bts_ctx *bts; + int state; + + if (!bts_ctx) + return; + bts = this_cpu_ptr(bts_ctx); + state = READ_ONCE(bts->state); /* * Here we transition from INACTIVE to ACTIVE; * if we instead are STOPPED from the interrupt handler, @@ -358,7 +364,12 @@ void intel_bts_enable_local(void) void intel_bts_disable_local(void) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts; + + if (!bts_ctx) + return; + + bts = this_cpu_ptr(bts_ctx); /* * Here we transition from ACTIVE to INACTIVE; @@ -372,19 +383,19 @@ void intel_bts_disable_local(void) } static int -bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) +bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle) { unsigned long head, space, next_space, pad, gap, skip, wakeup; unsigned int next_buf; struct bts_phys *phys, *next_phys; int ret; - if (buf->snapshot) + if (bb->snapshot) return 0; - head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); + head = handle->head & ((bb->nr_pages << PAGE_SHIFT) - 1); - phys = &buf->buf[buf->cur_buf]; + phys = &bb->buf[bb->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; pad = space; if (space > handle->size) { @@ -393,10 +404,10 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) } if (space <= BTS_SAFETY_MARGIN) { /* See if next phys buffer has more space */ - next_buf = buf->cur_buf + 1; - if (next_buf >= buf->nr_bufs) + next_buf = bb->cur_buf + 1; + if (next_buf >= bb->nr_bufs) next_buf = 0; - next_phys = &buf->buf[next_buf]; + next_phys = &bb->buf[next_buf]; gap = buf_size(phys->page) - phys->displacement - phys->size + next_phys->displacement; skip = pad + gap; @@ -421,8 +432,8 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) * anymore, so we must not be racing with * bts_update(). */ - buf->cur_buf = next_buf; - local_set(&buf->head, head); + bb->cur_buf = next_buf; + local_set(&bb->head, head); } } } @@ -435,7 +446,7 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) space -= space % BTS_RECORD_SIZE; } - buf->end = head + space; + bb->end = head + space; /* * If we have no space, the lost notification would have been sent when @@ -450,12 +461,17 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct perf_event *event = bts->handle.event; - struct bts_buffer *buf; + struct bts_ctx *bts; + struct perf_event *event; + struct bts_buffer *bb; s64 old_head; int err = -ENOSPC, handled = 0; + if (!bts_ctx) + return 0; + + bts = this_cpu_ptr(bts_ctx); + event = bts->handle.event; /* * The only surefire way of knowing if this NMI is ours is by checking * the write ptr against the PMI threshold. @@ -470,8 +486,8 @@ int intel_bts_interrupt(void) if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) return handled; - buf = perf_get_aux(&bts->handle); - if (!buf) + bb = perf_get_aux(&bts->handle); + if (!bb) return handled; /* @@ -479,26 +495,26 @@ int intel_bts_interrupt(void) * there's no other way of telling, because the pointer will * keep moving */ - if (buf->snapshot) + if (bb->snapshot) return 0; - old_head = local_read(&buf->head); + old_head = local_read(&bb->head); bts_update(bts); /* no new data */ - if (old_head == local_read(&buf->head)) + if (old_head == local_read(&bb->head)) return handled; - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0)); + perf_aux_output_end(&bts->handle, local_xchg(&bb->data_size, 0)); - buf = perf_aux_output_begin(&bts->handle, event); - if (buf) - err = bts_buffer_reset(buf, &bts->handle); + bb = perf_aux_output_begin(&bts->handle, event); + if (bb) + err = bts_buffer_reset(bb, &bts->handle); if (err) { WRITE_ONCE(bts->state, BTS_STATE_STOPPED); - if (buf) { + if (bb) { /* * BTS_STATE_STOPPED should be visible before * cleared handle::event @@ -518,7 +534,7 @@ static void bts_event_del(struct perf_event *event, int mode) static int bts_event_add(struct perf_event *event, int mode) { - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_ctx *bts = this_cpu_ptr(bts_ctx); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; @@ -557,12 +573,9 @@ static int bts_event_init(struct perf_event *event) * disabled, so disallow intel_bts driver for unprivileged * users on paranoid systems since it provides trace data * to the user in a zero-copy fashion. - * - * Note that the default paranoia setting permits unprivileged - * users to profile the kernel. */ if (event->attr.exclude_kernel) { - ret = perf_allow_kernel(&event->attr); + ret = perf_allow_kernel(); if (ret) return ret; } @@ -587,7 +600,11 @@ static void bts_event_read(struct perf_event *event) static __init int bts_init(void) { - if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) + if (!boot_cpu_has(X86_FEATURE_DTES64)) + return -ENODEV; + + x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); + if (!x86_pmu.bts) return -ENODEV; if (boot_cpu_has(X86_FEATURE_PTI)) { @@ -608,6 +625,10 @@ static __init int bts_init(void) return -ENODEV; } + bts_ctx = alloc_percpu(struct bts_ctx); + if (!bts_ctx) + return -ENOMEM; + bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE | PERF_PMU_CAP_EXCLUSIVE; bts_pmu.task_ctx_nr = perf_sw_context; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 768d1414897f..466283326630 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -23,6 +23,7 @@ #include <asm/intel_pt.h> #include <asm/apic.h> #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -220,6 +221,17 @@ static struct event_constraint intel_grt_event_constraints[] __read_mostly = { EVENT_CONSTRAINT_END }; +static struct event_constraint intel_skt_event_constraints[] __read_mostly = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ + FIXED_EVENT_CONSTRAINT(0x0073, 4), /* TOPDOWN_BAD_SPECULATION.ALL */ + FIXED_EVENT_CONSTRAINT(0x019c, 5), /* TOPDOWN_FE_BOUND.ALL */ + FIXED_EVENT_CONSTRAINT(0x02c2, 6), /* TOPDOWN_RETIRING.ALL */ + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -370,6 +382,59 @@ static struct extra_reg intel_rwc_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +static struct event_constraint intel_lnc_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_HEAVY_OPS, 4), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BR_MISPREDICT, 5), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), + METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + + INTEL_EVENT_CONSTRAINT(0x20, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x012a, 0xf), + INTEL_UEVENT_CONSTRAINT(0x012b, 0xf), + INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), + INTEL_UEVENT_CONSTRAINT(0x0175, 0x4), + + INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff), + INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff), + + INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), + INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), + INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1), + INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), + INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc), + INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), + + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x00e0, 0xf), + + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_lnc_extra_regs[] __read_mostly = { + INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0xfffffffffffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0xfffffffffffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x02c6, MSR_PEBS_FRONTEND, 0x9, FE), + INTEL_UEVENT_EXTRA_REG(0x03c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0xf, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), + EVENT_EXTRA_END +}; + EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); @@ -2160,6 +2225,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01"); +EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02"); + +static struct attribute *skt_events_attrs[] = { + EVENT_PTR(td_fe_bound_skt), + EVENT_PTR(td_retiring_skt), + EVENT_PTR(td_bad_spec_cmt), + EVENT_PTR(td_be_bound_skt), + NULL, +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -2221,7 +2298,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0); if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); @@ -2230,7 +2307,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts) static __always_inline void intel_pmu_disable_all(void) { __intel_pmu_disable_all(true); - intel_pmu_pebs_disable_all(); + static_call_cond(x86_pmu_pebs_disable_all)(); intel_pmu_lbr_disable_all(); } @@ -2242,11 +2319,11 @@ static void __intel_pmu_enable_all(int added, bool pmi) intel_pmu_lbr_enable_all(pmi); if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) { - wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val); + wrmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val); cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val; } - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, intel_ctrl & ~cpuc->intel_ctrl_guest_mask); if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { @@ -2262,7 +2339,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) static void intel_pmu_enable_all(int added) { - intel_pmu_pebs_enable_all(); + static_call_cond(x86_pmu_pebs_enable_all)(); __intel_pmu_enable_all(added, false); } @@ -2362,12 +2439,12 @@ static void intel_pmu_nhm_workaround(void) } for (i = 0; i < 4; i++) { - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); - wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); + wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); + wrmsrq(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); } - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); for (i = 0; i < 4; i++) { event = cpuc->events[i]; @@ -2377,7 +2454,7 @@ static void intel_pmu_nhm_workaround(void) __x86_pmu_enable_event(&event->hw, ARCH_PERFMON_EVENTSEL_ENABLE); } else - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); + wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); } } @@ -2394,7 +2471,7 @@ static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on) if (cpuc->tfa_shadow != val) { cpuc->tfa_shadow = val; - wrmsrl(MSR_TSX_FORCE_ABORT, val); + wrmsrq(MSR_TSX_FORCE_ABORT, val); } } @@ -2425,14 +2502,14 @@ static inline u64 intel_pmu_get_status(void) { u64 status; - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status); return status; } static inline void intel_pmu_ack_status(u64 ack) { - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); + wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } static inline bool event_is_checkpointed(struct perf_event *event) @@ -2519,7 +2596,7 @@ static void intel_pmu_disable_event(struct perf_event *event) * so we don't trigger the event without PEBS bit set. */ if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_disable(event); + static_call(x86_pmu_pebs_disable)(event); } static void intel_pmu_assign_event(struct perf_event *event, int idx) @@ -2539,6 +2616,9 @@ static void intel_pmu_del_event(struct perf_event *event) intel_pmu_lbr_del(event); if (event->attr.precise_ip) intel_pmu_pebs_del(event); + if (is_pebs_counter_event_group(event) || + is_acr_event_group(event)) + this_cpu_ptr(&cpu_hw_events)->n_late_setup--; } static int icl_set_topdown_event_period(struct perf_event *event) @@ -2555,15 +2635,15 @@ static int icl_set_topdown_event_period(struct perf_event *event) * Don't need to clear them again. */ if (left == x86_pmu.max_period) { - wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); - wrmsrl(MSR_PERF_METRICS, 0); + wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrq(MSR_PERF_METRICS, 0); hwc->saved_slots = 0; hwc->saved_metric = 0; } if ((hwc->saved_slots) && is_slots_event(event)) { - wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots); - wrmsrl(MSR_PERF_METRICS, hwc->saved_metric); + wrmsrq(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots); + wrmsrq(MSR_PERF_METRICS, hwc->saved_metric); } perf_event_update_userpage(event); @@ -2650,7 +2730,7 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots, * modify by a NMI. PMU has to be disabled before calling this function. */ -static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) +static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *other; @@ -2658,13 +2738,24 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) bool reset = true; int idx; - /* read Fixed counter 3 */ - rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots); - if (!slots) - return 0; + if (!val) { + /* read Fixed counter 3 */ + slots = rdpmc(3 | INTEL_PMC_FIXED_RDPMC_BASE); + if (!slots) + return 0; - /* read PERF_METRICS */ - rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics); + /* read PERF_METRICS */ + metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS); + } else { + slots = val[0]; + metrics = val[1]; + /* + * Don't reset the PERF_METRICS and Fixed counter 3 + * for each PEBS record read. Utilize the RDPMC metrics + * clear mode. + */ + reset = false; + } for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) { if (!is_topdown_idx(idx)) @@ -2698,8 +2789,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) if (reset) { /* The fixed counter 3 has to be written before the PERF_METRICS. */ - wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); - wrmsrl(MSR_PERF_METRICS, 0); + wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrq(MSR_PERF_METRICS, 0); if (event) update_saved_topdown_regs(event, 0, 0, metric_end); } @@ -2707,36 +2798,47 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end) return slots; } -static u64 icl_update_topdown_event(struct perf_event *event) +static u64 icl_update_topdown_event(struct perf_event *event, u64 *val) { return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE + - x86_pmu.num_topdown_events - 1); + x86_pmu.num_topdown_events - 1, + val); } -DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); +DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); -static void intel_pmu_read_topdown_event(struct perf_event *event) +static void intel_pmu_read_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) || + is_pebs_counter_event_group(event)) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + bool pmu_enabled = cpuc->enabled; - /* Only need to call update_topdown_event() once for group read. */ - if ((cpuc->txn_flags & PERF_PMU_TXN_READ) && - !is_slots_event(event)) - return; + /* Only need to call update_topdown_event() once for group read. */ + if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ)) + return; - perf_pmu_disable(event->pmu); - static_call(intel_pmu_update_topdown_event)(event); - perf_pmu_enable(event->pmu); -} + cpuc->enabled = 0; + if (pmu_enabled) + intel_pmu_disable_all(); -static void intel_pmu_read_event(struct perf_event *event) -{ - if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) - intel_pmu_auto_reload_read(event); - else if (is_topdown_count(event)) - intel_pmu_read_topdown_event(event); - else - x86_perf_event_update(event); + /* + * If the PEBS counters snapshotting is enabled, + * the topdown event is available in PEBS records. + */ + if (is_topdown_event(event) && !is_pebs_counter_event_group(event)) + static_call(intel_pmu_update_topdown_event)(event, NULL); + else + intel_pmu_drain_pebs_buffer(); + + cpuc->enabled = pmu_enabled; + if (pmu_enabled) + intel_pmu_enable_all(0); + + return; + } + + x86_perf_event_update(event); } static void intel_pmu_enable_fixed(struct perf_event *event) @@ -2756,6 +2858,9 @@ static void intel_pmu_enable_fixed(struct perf_event *event) return; idx = INTEL_PMC_IDX_FIXED_SLOTS; + + if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR) + bits |= INTEL_FIXED_3_METRICS_CLEAR; } intel_set_masks(event, idx); @@ -2791,6 +2896,52 @@ static void intel_pmu_enable_fixed(struct perf_event *event) cpuc->fixed_ctrl_val |= bits; } +static void intel_pmu_config_acr(int idx, u64 mask, u32 reload) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int msr_b, msr_c; + + if (!mask && !cpuc->acr_cfg_b[idx]) + return; + + if (idx < INTEL_PMC_IDX_FIXED) { + msr_b = MSR_IA32_PMC_V6_GP0_CFG_B; + msr_c = MSR_IA32_PMC_V6_GP0_CFG_C; + } else { + msr_b = MSR_IA32_PMC_V6_FX0_CFG_B; + msr_c = MSR_IA32_PMC_V6_FX0_CFG_C; + idx -= INTEL_PMC_IDX_FIXED; + } + + if (cpuc->acr_cfg_b[idx] != mask) { + wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask); + cpuc->acr_cfg_b[idx] = mask; + } + /* Only need to update the reload value when there is a valid config value. */ + if (mask && cpuc->acr_cfg_c[idx] != reload) { + wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload); + cpuc->acr_cfg_c[idx] = reload; + } +} + +static void intel_pmu_enable_acr(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!is_acr_event_group(event) || !event->attr.config2) { + /* + * The disable doesn't clear the ACR CFG register. + * Check and clear the ACR CFG register. + */ + intel_pmu_config_acr(hwc->idx, 0, 0); + return; + } + + intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period); +} + +DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr); + static void intel_pmu_enable_event(struct perf_event *event) { u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; @@ -2798,16 +2949,19 @@ static void intel_pmu_enable_event(struct perf_event *event) int idx = hwc->idx; if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_enable(event); + static_call(x86_pmu_pebs_enable)(event); switch (idx) { case 0 ... INTEL_PMC_IDX_FIXED - 1: if (branch_sample_counters(event)) enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; intel_set_masks(event, idx); + static_call_cond(intel_pmu_enable_acr_event)(event); __x86_pmu_enable_event(hwc, enable_mask); break; case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: + static_call_cond(intel_pmu_enable_acr_event)(event); + fallthrough; case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: intel_pmu_enable_fixed(event); break; @@ -2825,12 +2979,51 @@ static void intel_pmu_enable_event(struct perf_event *event) } } +static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc) +{ + struct perf_event *event, *leader; + int i, j, idx; + + for (i = 0; i < cpuc->n_events; i++) { + leader = cpuc->event_list[i]; + if (!is_acr_event_group(leader)) + continue; + + /* The ACR events must be contiguous. */ + for (j = i; j < cpuc->n_events; j++) { + event = cpuc->event_list[j]; + if (event->group_leader != leader->group_leader) + break; + for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) { + if (WARN_ON_ONCE(i + idx > cpuc->n_events)) + return; + __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1); + } + } + i = j - 1; + } +} + +void intel_pmu_late_setup(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + if (!cpuc->n_late_setup) + return; + + intel_pmu_pebs_late_setup(cpuc); + intel_pmu_acr_late_setup(cpuc); +} + static void intel_pmu_add_event(struct perf_event *event) { if (event->attr.precise_ip) intel_pmu_pebs_add(event); if (intel_pmu_needs_branch_stack(event)) intel_pmu_lbr_add(event); + if (is_pebs_counter_event_group(event) || + is_acr_event_group(event)) + this_cpu_ptr(&cpu_hw_events)->n_late_setup++; } /* @@ -2848,7 +3041,7 @@ int intel_pmu_save_and_restart(struct perf_event *event) */ if (unlikely(event_is_checkpointed(event))) { /* No race with NMIs because the counter should not be armed */ - wrmsrl(event->hw.event_base, 0); + wrmsrq(event->hw.event_base, 0); local64_set(&event->hw.prev_count, 0); } return static_call(x86_pmu_set_period)(event); @@ -2865,7 +3058,7 @@ static int intel_pmu_set_period(struct perf_event *event) static u64 intel_pmu_update(struct perf_event *event) { if (unlikely(is_topdown_count(event))) - return static_call(intel_pmu_update_topdown_event)(event); + return static_call(intel_pmu_update_topdown_event)(event, NULL); return x86_perf_event_update(event); } @@ -2874,26 +3067,26 @@ static void intel_pmu_reset(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); - int num_counters = hybrid(cpuc->pmu, num_counters); + unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask); + unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); unsigned long flags; int idx; - if (!num_counters) + if (!*(u64 *)cntr_mask) return; local_irq_save(flags); pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); - for (idx = 0; idx < num_counters; idx++) { - wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); - wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); + for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) { + wrmsrq_safe(x86_pmu_config_addr(idx), 0ull); + wrmsrq_safe(x86_pmu_event_addr(idx), 0ull); } - for (idx = 0; idx < num_counters_fixed; idx++) { + for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) { if (fixed_counter_disabled(idx, cpuc->pmu)) continue; - wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + wrmsrq_safe(x86_pmu_fixed_ctr_addr(idx), 0ull); } if (ds) @@ -2902,7 +3095,7 @@ static void intel_pmu_reset(void) /* Ack all overflows and disable fixed counters */ if (x86_pmu.version >= 2) { intel_pmu_ack_status(intel_pmu_get_status()); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0); } /* Reset LBRs and LBR freezing */ @@ -2940,15 +3133,13 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs, !guest_pebs_idxs) return; - for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, - INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed) { + for_each_set_bit(bit, (unsigned long *)&guest_pebs_idxs, X86_PMC_IDX_MAX) { event = cpuc->events[bit]; if (!event->attr.precise_ip) continue; perf_sample_data_init(data, 0, event->hw.last_period); - if (perf_event_overflow(event, data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, data, regs); /* Inject one fake event is enough. */ break; @@ -2961,7 +3152,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int bit; int handled = 0; - u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); inc_irq_stat(apic_perf_irqs); @@ -3004,8 +3194,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) handled++; x86_pmu_handle_guest_pebs(regs, &data); - x86_pmu.drain_pebs(regs, &data); - status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; + static_call(x86_pmu_drain_pebs)(regs, &data); /* * PMI throttle may be triggered, which stops the PEBS event. @@ -3015,7 +3204,16 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) * Update the MSR if pebs_enabled is changed. */ if (pebs_enabled != cpuc->pebs_enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + + /* + * Above PEBS handler (PEBS counters snapshotting) has updated fixed + * counter 3 and perf metrics counts if they are in counter group, + * unnecessary to update again. + */ + if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && + is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) + status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; } /* @@ -3032,9 +3230,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) { handled++; - static_call(intel_pmu_update_topdown_event)(NULL); + static_call(intel_pmu_update_topdown_event)(NULL, NULL); } + status &= hybrid(cpuc->pmu, intel_ctrl); + /* * Checkpointed counters can lead to 'spurious' PMIs because the * rollback caused by the PMI will have cleared the overflow status @@ -3044,22 +3244,45 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + u64 last_period; handled++; if (!test_bit(bit, cpuc->active_mask)) continue; + /* + * There may be unprocessed PEBS records in the PEBS buffer, + * which still stores the previous values. + * Process those records first before handling the latest value. + * For example, + * A is a regular counter + * B is a PEBS event which reads A + * C is a PEBS event + * + * The following can happen: + * B-assist A=1 + * C A=2 + * B-assist A=3 + * A-overflow-PMI A=4 + * C-assist-PMI (PEBS buffer) A=5 + * + * The PEBS buffer has to be drained before handling the A-PMI + */ + if (is_pebs_counter_event_group(event)) + x86_pmu.drain_pebs(regs, &data); + + last_period = event->hw.last_period; + if (!intel_pmu_save_and_restart(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); if (has_branch_stack(event)) intel_pmu_lbr_save_brstack(&data, cpuc, event); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } return handled; @@ -3621,10 +3844,9 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, if (cpuc->excl_cntrs) return intel_get_excl_constraints(cpuc, event, idx, c2); - /* Not all counters support the branch counter feature. */ - if (branch_sample_counters(event)) { + if (event->hw.dyn_constraint != ~0ULL) { c2 = dyn_constraint(cpuc, c2, idx); - c2->idxmsk64 &= x86_pmu.lbr_counters; + c2->idxmsk64 &= event->hw.dyn_constraint; c2->weight = hweight64(c2->idxmsk64); } @@ -3886,6 +4108,118 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) return test_bit(idx, (unsigned long *)&intel_cap->capabilities); } +static u64 intel_pmu_freq_start_period(struct perf_event *event) +{ + int type = event->attr.type; + u64 config, factor; + s64 start; + + /* + * The 127 is the lowest possible recommended SAV (sample after value) + * for a 4000 freq (default freq), according to the event list JSON file. + * Also, assume the workload is idle 50% time. + */ + factor = 64 * 4000; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + goto end; + + /* + * The estimation of the start period in the freq mode is + * based on the below assumption. + * + * For a cycles or an instructions event, 1GHZ of the + * underlying platform, 1 IPC. The workload is idle 50% time. + * The start period = 1,000,000,000 * 1 / freq / 2. + * = 500,000,000 / freq + * + * Usually, the branch-related events occur less than the + * instructions event. According to the Intel event list JSON + * file, the SAV (sample after value) of a branch-related event + * is usually 1/4 of an instruction event. + * The start period of branch-related events = 125,000,000 / freq. + * + * The cache-related events occurs even less. The SAV is usually + * 1/20 of an instruction event. + * The start period of cache-related events = 25,000,000 / freq. + */ + config = event->attr.config & PERF_HW_EVENT_MASK; + if (type == PERF_TYPE_HARDWARE) { + switch (config) { + case PERF_COUNT_HW_CPU_CYCLES: + case PERF_COUNT_HW_INSTRUCTIONS: + case PERF_COUNT_HW_BUS_CYCLES: + case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: + case PERF_COUNT_HW_STALLED_CYCLES_BACKEND: + case PERF_COUNT_HW_REF_CPU_CYCLES: + factor = 500000000; + break; + case PERF_COUNT_HW_BRANCH_INSTRUCTIONS: + case PERF_COUNT_HW_BRANCH_MISSES: + factor = 125000000; + break; + case PERF_COUNT_HW_CACHE_REFERENCES: + case PERF_COUNT_HW_CACHE_MISSES: + factor = 25000000; + break; + default: + goto end; + } + } + + if (type == PERF_TYPE_HW_CACHE) + factor = 25000000; +end: + /* + * Usually, a prime or a number with less factors (close to prime) + * is chosen as an SAV, which makes it less likely that the sampling + * period synchronizes with some periodic event in the workload. + * Minus 1 to make it at least avoiding values near power of twos + * for the default freq. + */ + start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1; + + if (start > x86_pmu.max_period) + start = x86_pmu.max_period; + + if (x86_pmu.limit_period) + x86_pmu.limit_period(event, &start); + + return start; +} + +static inline bool intel_pmu_has_acr(struct pmu *pmu) +{ + return !!hybrid(pmu, acr_cause_mask64); +} + +static bool intel_pmu_is_acr_group(struct perf_event *event) +{ + /* The group leader has the ACR flag set */ + if (is_acr_event_group(event)) + return true; + + /* The acr_mask is set */ + if (event->attr.config2) + return true; + + return false; +} + +static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event, + u64 *cause_mask, int *num) +{ + event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64); + *cause_mask |= event->attr.config2; + *num += 1; +} + +static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event, + int idx, u64 cause_mask) +{ + if (test_bit(idx, (unsigned long *)&cause_mask)) + event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3897,14 +4231,20 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.freq && event->attr.sample_freq) { + event->hw.sample_period = intel_pmu_freq_start_period(event); + event->hw.last_period = event->hw.sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + } + if (event->attr.precise_ip) { if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) return -EINVAL; if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; - if (!(event->attr.sample_type & - ~intel_pmu_large_pebs_flags(event))) { + if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) && + !has_aux_action(event)) { event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; event->attach_state |= PERF_ATTACH_SCHED_CB; } @@ -3913,8 +4253,12 @@ static int intel_pmu_hw_config(struct perf_event *event) x86_pmu.pebs_aliases(event); } - if (needs_branch_stack(event) && is_sampling_event(event)) - event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK; + if (needs_branch_stack(event)) { + /* Avoid branch stack setup for counting events in SAMPLE READ */ + if (is_sampling_event(event) || + !(event->attr.sample_type & PERF_SAMPLE_READ)) + event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK; + } if (branch_sample_counters(event)) { struct perf_event *leader, *sibling; @@ -3937,15 +4281,19 @@ static int intel_pmu_hw_config(struct perf_event *event) leader = event->group_leader; if (branch_sample_call_stack(leader)) return -EINVAL; - if (branch_sample_counters(leader)) + if (branch_sample_counters(leader)) { num++; + leader->hw.dyn_constraint &= x86_pmu.lbr_counters; + } leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; for_each_sibling_event(sibling, leader) { if (branch_sample_call_stack(sibling)) return -EINVAL; - if (branch_sample_counters(sibling)) + if (branch_sample_counters(sibling)) { num++; + sibling->hw.dyn_constraint &= x86_pmu.lbr_counters; + } } if (num > fls(x86_pmu.lbr_counters)) @@ -3993,6 +4341,101 @@ static int intel_pmu_hw_config(struct perf_event *event) event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT; } + if ((event->attr.sample_type & PERF_SAMPLE_READ) && + (x86_pmu.intel_cap.pebs_format >= 6) && + x86_pmu.intel_cap.pebs_baseline && + is_sampling_event(event) && + event->attr.precise_ip) + event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR; + + if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) { + struct perf_event *sibling, *leader = event->group_leader; + struct pmu *pmu = event->pmu; + bool has_sw_event = false; + int num = 0, idx = 0; + u64 cause_mask = 0; + + /* Not support perf metrics */ + if (is_metric_event(event)) + return -EINVAL; + + /* Not support freq mode */ + if (event->attr.freq) + return -EINVAL; + + /* PDist is not supported */ + if (event->attr.config2 && event->attr.precise_ip > 2) + return -EINVAL; + + /* The reload value cannot exceeds the max period */ + if (event->attr.sample_period > x86_pmu.max_period) + return -EINVAL; + /* + * The counter-constraints of each event cannot be finalized + * unless the whole group is scanned. However, it's hard + * to know whether the event is the last one of the group. + * Recalculate the counter-constraints for each event when + * adding a new event. + * + * The group is traversed twice, which may be optimized later. + * In the first round, + * - Find all events which do reload when other events + * overflow and set the corresponding counter-constraints + * - Add all events, which can cause other events reload, + * in the cause_mask + * - Error out if the number of events exceeds the HW limit + * - The ACR events must be contiguous. + * Error out if there are non-X86 events between ACR events. + * This is not a HW limit, but a SW limit. + * With the assumption, the intel_pmu_acr_late_setup() can + * easily convert the event idx to counter idx without + * traversing the whole event list. + */ + if (!is_x86_event(leader)) + return -EINVAL; + + if (leader->attr.config2) + intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num); + + if (leader->nr_siblings) { + for_each_sibling_event(sibling, leader) { + if (!is_x86_event(sibling)) { + has_sw_event = true; + continue; + } + if (!sibling->attr.config2) + continue; + if (has_sw_event) + return -EINVAL; + intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num); + } + } + if (leader != event && event->attr.config2) { + if (has_sw_event) + return -EINVAL; + intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num); + } + + if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) || + num > hweight64(hybrid(event->pmu, acr_cntr_mask64))) + return -EINVAL; + /* + * In the second round, apply the counter-constraints for + * the events which can cause other events reload. + */ + intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask); + + if (leader->nr_siblings) { + for_each_sibling_event(sibling, leader) + intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask); + } + + if (leader != event) + intel_pmu_set_acr_caused_constr(event, idx, cause_mask); + + leader->hw.flags |= PERF_X86_EVENT_ACR; + } + if ((event->attr.type == PERF_TYPE_HARDWARE) || (event->attr.type == PERF_TYPE_HW_CACHE)) return 0; @@ -4008,7 +4451,12 @@ static int intel_pmu_hw_config(struct perf_event *event) * is used in a metrics group, it too cannot support sampling. */ if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) { - if (event->attr.config1 || event->attr.config2) + /* The metrics_clear can only be set for the slots event */ + if (event->attr.config1 && + (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR))) + return -EINVAL; + + if (event->attr.config2) return -EINVAL; /* @@ -4087,7 +4535,7 @@ static int intel_pmu_hw_config(struct perf_event *event) if (x86_pmu.version < 3) return -EINVAL; - ret = perf_allow_cpu(&event->attr); + ret = perf_allow_cpu(); if (ret) return ret; @@ -4135,7 +4583,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask, }; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return arr; /* @@ -4176,7 +4624,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) arr[pebs_enable] = (struct perf_guest_switch_msr){ .msr = MSR_IA32_PEBS_ENABLE, .host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask, - .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask, + .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable, }; if (arr[pebs_enable].host) { @@ -4199,7 +4647,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data) struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; int idx; - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[idx]; arr[idx].msr = x86_pmu_config_addr(idx); @@ -4217,7 +4665,7 @@ static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr, void *data) arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; } - *nr = x86_pmu.num_counters; + *nr = x86_pmu_max_num_counters(cpuc->pmu); return arr; } @@ -4232,7 +4680,7 @@ static void core_pmu_enable_all(int added) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { struct hw_perf_event *hwc = &cpuc->events[idx]->hw; if (!test_bit(idx, cpuc->active_mask) || @@ -4525,9 +4973,50 @@ static int adl_hw_config(struct perf_event *event) return -EOPNOTSUPP; } -static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) +static enum intel_cpu_type adl_get_hybrid_cpu_type(void) { - return HYBRID_INTEL_CORE; + return INTEL_CPU_TYPE_CORE; +} + +static inline bool erratum_hsw11(struct perf_event *event) +{ + return (event->hw.config & INTEL_ARCH_EVENT_MASK) == + X86_CONFIG(.event=0xc0, .umask=0x01); +} + +static struct event_constraint * +arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->pmu_type == hybrid_tiny) + return cmt_get_event_constraints(cpuc, idx, event); + + return mtl_get_event_constraints(cpuc, idx, event); +} + +static int arl_h_hw_config(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->pmu_type == hybrid_tiny) + return intel_pmu_hw_config(event); + + return adl_hw_config(event); +} + +/* + * The HSW11 requires a period larger than 100 which is the same as the BDM11. + * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. + * + * The message 'interrupt took too long' can be observed on any counter which + * was armed with a period < 32 and two events expired in the same NMI. + * A minimum period of 32 is enforced for the rest of the events. + */ +static void hsw_limit_period(struct perf_event *event, s64 *left) +{ + *left = max(*left, erratum_hsw11(event) ? 128 : 32); } /* @@ -4547,8 +5036,7 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) */ static void bdw_limit_period(struct perf_event *event, s64 *left) { - if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == - X86_CONFIG(.event=0xc0, .umask=0x01)) { + if (erratum_hsw11(event)) { if (*left < 128) *left = 128; *left &= ~0x3fULL; @@ -4573,8 +5061,65 @@ PMU_FORMAT_ATTR(pc, "config:19" ); PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ PMU_FORMAT_ATTR(inv, "config:23" ); PMU_FORMAT_ATTR(cmask, "config:24-31" ); -PMU_FORMAT_ATTR(in_tx, "config:32"); -PMU_FORMAT_ATTR(in_tx_cp, "config:33"); +PMU_FORMAT_ATTR(in_tx, "config:32" ); +PMU_FORMAT_ATTR(in_tx_cp, "config:33" ); +PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */ + +PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */ + +static ssize_t umask2_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + u64 mask = hybrid(dev_get_drvdata(dev), config_mask) & ARCH_PERFMON_EVENTSEL_UMASK2; + + if (mask == ARCH_PERFMON_EVENTSEL_UMASK2) + return sprintf(page, "config:8-15,40-47\n"); + + /* Roll back to the old format if umask2 is not supported. */ + return sprintf(page, "config:8-15\n"); +} + +static struct device_attribute format_attr_umask2 = + __ATTR(umask, 0444, umask2_show, NULL); + +static struct attribute *format_evtsel_ext_attrs[] = { + &format_attr_umask2.attr, + &format_attr_eq.attr, + &format_attr_metrics_clear.attr, + NULL +}; + +static umode_t +evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + u64 mask; + + /* + * The umask and umask2 have different formats but share the + * same attr name. In update mode, the previous value of the + * umask is unconditionally removed before is_visible. If + * umask2 format is not enumerated, it's impossible to roll + * back to the old format. + * Does the check in umask2_show rather than is_visible. + */ + if (i == 0) + return attr->mode; + + mask = hybrid(dev_get_drvdata(dev), config_mask); + if (i == 1) + return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0; + + /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */ + if (i == 2) { + union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap); + + return intel_cap.rdpmc_metrics_clear ? attr->mode : 0; + } + + return 0; +} static struct attribute *intel_arch_formats_attr[] = { &format_attr_event.attr, @@ -4636,7 +5181,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu) goto err; } - if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) { + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) { size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); @@ -4684,13 +5229,33 @@ static void flip_smm_bit(void *data) } } -static void intel_pmu_check_num_counters(int *num_counters, - int *num_counters_fixed, - u64 *intel_ctrl, u64 fixed_mask); +static void intel_pmu_check_counters_mask(u64 *cntr_mask, + u64 *fixed_cntr_mask, + u64 *intel_ctrl) +{ + unsigned int bit; + + bit = fls64(*cntr_mask); + if (bit > INTEL_PMC_MAX_GENERIC) { + WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", + bit, INTEL_PMC_MAX_GENERIC); + *cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0); + } + *intel_ctrl = *cntr_mask; + + bit = fls64(*fixed_cntr_mask); + if (bit > INTEL_PMC_MAX_FIXED) { + WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", + bit, INTEL_PMC_MAX_FIXED); + *fixed_cntr_mask &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0); + } + + *intel_ctrl |= *fixed_cntr_mask << INTEL_PMC_IDX_FIXED; +} static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, - int num_counters, - int num_counters_fixed, + u64 cntr_mask, + u64 fixed_cntr_mask, u64 intel_ctrl); static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); @@ -4698,54 +5263,66 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); static inline bool intel_pmu_broken_perf_cap(void) { /* The Perf Metric (Bit 15) is always cleared */ - if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) || - (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L)) + if (boot_cpu_data.x86_vfm == INTEL_METEORLAKE || + boot_cpu_data.x86_vfm == INTEL_METEORLAKE_L) return true; return false; } -static void update_pmu_cap(struct x86_hybrid_pmu *pmu) +static void update_pmu_cap(struct pmu *pmu) { - unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF); - unsigned int eax, ebx, ecx, edx; + unsigned int cntr, fixed_cntr, ecx, edx; + union cpuid35_eax eax; + union cpuid35_ebx ebx; + + cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); + + if (ebx.split.umask2) + hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2; + if (ebx.split.eq) + hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ; - if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) { + if (eax.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, - &eax, &ebx, &ecx, &edx); - pmu->num_counters = fls(eax); - pmu->num_counters_fixed = fls(ebx); + &cntr, &fixed_cntr, &ecx, &edx); + hybrid(pmu, cntr_mask64) = cntr; + hybrid(pmu, fixed_cntr_mask64) = fixed_cntr; } + if (eax.split.acr_subleaf) { + cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF, + &cntr, &fixed_cntr, &ecx, &edx); + /* The mask of the counters which can be reloaded */ + hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED); + + /* The mask of the counters which can cause a reload of reloadable counters */ + hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED); + } if (!intel_pmu_broken_perf_cap()) { /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ - rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); + rdmsrq(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities); } } static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) { - intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, - &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1); - pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + intel_pmu_check_counters_mask(&pmu->cntr_mask64, &pmu->fixed_cntr_mask64, + &pmu->intel_ctrl); + pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64); pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); + __EVENT_CONSTRAINT(0, pmu->cntr_mask64, + 0, x86_pmu_num_counters(&pmu->pmu), 0, 0); if (pmu->intel_cap.perf_metrics) pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; else pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); - if (pmu->intel_cap.pebs_output_pt_available) - pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; - else - pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; - intel_pmu_check_event_constraints(pmu->event_constraints, - pmu->num_counters, - pmu->num_counters_fixed, + pmu->cntr_mask64, + pmu->fixed_cntr_mask64, pmu->intel_ctrl); intel_pmu_check_extra_regs(pmu->extra_regs); @@ -4753,7 +5330,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) { - u8 cpu_type = get_this_hybrid_cpu_type(); + struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + enum intel_cpu_type cpu_type = c->topo.intel_type; int i; /* @@ -4762,7 +5340,7 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) * on it. There should be a fixup function provided for these * troublesome CPUs (->get_hybrid_cpu_type). */ - if (cpu_type == HYBRID_INTEL_NONE) { + if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) { if (x86_pmu.get_hybrid_cpu_type) cpu_type = x86_pmu.get_hybrid_cpu_type(); else @@ -4771,17 +5349,26 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) /* * This essentially just maps between the 'hybrid_cpu_type' - * and 'hybrid_pmu_type' enums: + * and 'hybrid_pmu_type' enums except for ARL-H processor + * which needs to compare atom uarch native id since ARL-H + * contains two different atom uarchs. */ for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; + u32 native_id; - if (cpu_type == HYBRID_INTEL_CORE && - pmu_type == hybrid_big) - return &x86_pmu.hybrid_pmu[i]; - if (cpu_type == HYBRID_INTEL_ATOM && - pmu_type == hybrid_small) + if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big) return &x86_pmu.hybrid_pmu[i]; + if (cpu_type == INTEL_CPU_TYPE_ATOM) { + if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small) + return &x86_pmu.hybrid_pmu[i]; + + native_id = c->topo.intel_native_model_id; + if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small) + return &x86_pmu.hybrid_pmu[i]; + if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny) + return &x86_pmu.hybrid_pmu[i]; + } } return NULL; @@ -4802,22 +5389,18 @@ static bool init_hybrid_pmu(int cpu) goto end; if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) - update_pmu_cap(pmu); + update_pmu_cap(&pmu->pmu); intel_pmu_check_hybrid_pmus(pmu); - if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) + if (!check_hw_exists(&pmu->pmu, pmu->cntr_mask, pmu->fixed_cntr_mask)) return false; pr_info("%s PMU driver: ", pmu->name); - if (pmu->intel_cap.pebs_output_pt_available) - pr_cont("PEBS-via-PT "); - pr_cont("\n"); - x86_pmu_show_pmu_cap(pmu->num_counters, pmu->num_counters_fixed, - pmu->intel_ctrl); + x86_pmu_show_pmu_cap(&pmu->pmu); end: cpumask_set_cpu(cpu, &pmu->supported_cpus); @@ -4837,8 +5420,11 @@ static void intel_pmu_cpu_starting(int cpu) init_debug_store_on_cpu(cpu); /* - * Deal with CPUs that don't clear their LBRs on power-up. + * Deal with CPUs that don't clear their LBRs on power-up, and that may + * even boot with LBRs enabled. */ + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr) + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT); intel_pmu_lbr_reset(); cpuc->lbr_sel = NULL; @@ -4864,7 +5450,7 @@ static void intel_pmu_cpu_starting(int cpu) if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) { union perf_capabilities perf_cap; - rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities); + rdmsrq(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities); if (!perf_cap.perf_metrics) { x86_pmu.intel_cap.perf_metrics = 0; x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); @@ -4957,16 +5543,10 @@ static void intel_pmu_cpu_dead(int cpu) } static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, - bool sched_in) + struct task_struct *task, bool sched_in) { intel_pmu_pebs_sched_task(pmu_ctx, sched_in); - intel_pmu_lbr_sched_task(pmu_ctx, sched_in); -} - -static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) -{ - intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc); + intel_pmu_lbr_sched_task(pmu_ctx, task, sched_in); } static int intel_pmu_check_period(struct perf_event *event, u64 value) @@ -5058,6 +5638,7 @@ static __initconst const struct x86_pmu core_pmu = { .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0, .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, @@ -5111,6 +5692,7 @@ static __initconst const struct x86_pmu intel_pmu = { .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .fixedctr = MSR_ARCH_PERFMON_FIXED_CTR0, .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, @@ -5135,7 +5717,6 @@ static __initconst const struct x86_pmu intel_pmu = { .guest_get_msrs = intel_guest_get_msrs, .sched_task = intel_pmu_sched_task, - .swap_task_ctx = intel_pmu_swap_task_ctx, .check_period = intel_pmu_check_period, @@ -5182,46 +5763,36 @@ static __init void intel_clovertown_quirk(void) * these chips. */ pr_warn("PEBS disabled due to CPU errata\n"); - x86_pmu.pebs = 0; + x86_pmu.ds_pebs = 0; x86_pmu.pebs_constraints = NULL; } -static const struct x86_cpu_desc isolation_ucodes[] = { - INTEL_CPU_DESC(INTEL_FAM6_HASWELL, 3, 0x0000001f), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L, 1, 0x0000001e), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G, 1, 0x00000015), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL, 4, 0x00000023), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G, 1, 0x00000014), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 2, 0x00000010), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 9, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 10, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 11, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 12, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 10, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 11, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 12, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 13, 0x0000004e), +static const struct x86_cpu_id isolation_ucodes[] = { + X86_MATCH_VFM_STEPS(INTEL_HASWELL, 3, 3, 0x0000001f), + X86_MATCH_VFM_STEPS(INTEL_HASWELL_L, 1, 1, 0x0000001e), + X86_MATCH_VFM_STEPS(INTEL_HASWELL_G, 1, 1, 0x00000015), + X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 2, 2, 0x00000037), + X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 4, 4, 0x0000000a), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL, 4, 4, 0x00000023), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_G, 1, 1, 0x00000014), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 2, 2, 0x00000010), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 3, 3, 0x07000009), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 4, 4, 0x0f000009), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 5, 5, 0x0e000002), + X86_MATCH_VFM_STEPS(INTEL_BROADWELL_X, 1, 1, 0x0b000014), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 3, 3, 0x00000021), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 4, 7, 0x00000000), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 11, 11, 0x00000000), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_L, 3, 3, 0x0000007c), + X86_MATCH_VFM_STEPS(INTEL_SKYLAKE, 3, 3, 0x0000007c), + X86_MATCH_VFM_STEPS(INTEL_KABYLAKE, 9, 13, 0x0000004e), + X86_MATCH_VFM_STEPS(INTEL_KABYLAKE_L, 9, 12, 0x0000004e), {} }; static void intel_check_pebs_isolation(void) { - x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes); + x86_pmu.pebs_no_isolation = !x86_match_min_microcode_rev(isolation_ucodes); } static __init void intel_pebs_isolation_quirk(void) @@ -5231,16 +5802,16 @@ static __init void intel_pebs_isolation_quirk(void) intel_check_pebs_isolation(); } -static const struct x86_cpu_desc pebs_ucodes[] = { - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028), - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618), - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c), +static const struct x86_cpu_id pebs_ucodes[] = { + X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE, 7, 7, 0x00000028), + X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 6, 6, 0x00000618), + X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 7, 7, 0x0000070c), {} }; static bool intel_snb_pebs_broken(void) { - return !x86_cpu_has_min_microcode_rev(pebs_ucodes); + return !x86_match_min_microcode_rev(pebs_ucodes); } static void intel_snb_check_microcode(void) @@ -5287,24 +5858,24 @@ static bool check_msr(unsigned long msr, u64 mask) * matches, this is needed to detect certain hardware emulators * (qemu/kvm) that don't trap on the MSR access and always return 0s. */ - if (rdmsrl_safe(msr, &val_old)) + if (rdmsrq_safe(msr, &val_old)) return false; /* - * Only change the bits which can be updated by wrmsrl. + * Only change the bits which can be updated by wrmsrq. */ val_tmp = val_old ^ mask; if (is_lbr_from(msr)) val_tmp = lbr_from_signext_quirk_wr(val_tmp); - if (wrmsrl_safe(msr, val_tmp) || - rdmsrl_safe(msr, &val_new)) + if (wrmsrq_safe(msr, val_tmp) || + rdmsrq_safe(msr, &val_new)) return false; /* - * Quirk only affects validation in wrmsr(), so wrmsrl()'s value - * should equal rdmsrl()'s even with the quirk. + * Quirk only affects validation in wrmsr(), so wrmsrq()'s value + * should equal rdmsrq()'s even with the quirk. */ if (val_new != val_tmp) return false; @@ -5315,7 +5886,7 @@ static bool check_msr(unsigned long msr, u64 mask) /* Here it's sure that the MSR can be safely accessed. * Restore the old value and return. */ - wrmsrl(msr, val_old); + wrmsrq(msr, val_old); return true; } @@ -5645,18 +6216,11 @@ lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) static char pmu_name_str[30]; -static ssize_t pmu_name_show(struct device *cdev, - struct device_attribute *attr, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str); -} - -static DEVICE_ATTR_RO(pmu_name); +static DEVICE_STRING_ATTR_RO(pmu_name, 0444, pmu_name_str); static struct attribute *intel_pmu_caps_attrs[] = { - &dev_attr_pmu_name.attr, - NULL + &dev_attr_pmu_name.attr.attr, + NULL }; static DEVICE_ATTR(allow_tsx_force_abort, 0644, @@ -5687,7 +6251,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) static umode_t pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - return x86_pmu.pebs ? attr->mode : 0; + return x86_pmu.ds_pebs ? attr->mode : 0; } static umode_t @@ -5705,8 +6269,37 @@ exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) return x86_pmu.version >= 2 ? attr->mode : 0; } +static umode_t +td_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + /* + * Hide the perf metrics topdown events + * if the feature is not enumerated. + */ + if (x86_pmu.num_topdown_events) + return x86_pmu.intel_cap.perf_metrics ? attr->mode : 0; + + return attr->mode; +} + +PMU_FORMAT_ATTR(acr_mask, "config2:0-63"); + +static struct attribute *format_acr_attrs[] = { + &format_attr_acr_mask.attr, + NULL +}; + +static umode_t +acr_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + + return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0; +} + static struct attribute_group group_events_td = { .name = "events", + .is_visible = td_is_visible, }; static struct attribute_group group_events_mem = { @@ -5740,6 +6333,18 @@ static struct attribute_group group_format_extra_skl = { .is_visible = exra_is_visible, }; +static struct attribute_group group_format_evtsel_ext = { + .name = "format", + .attrs = format_evtsel_ext_attrs, + .is_visible = evtsel_ext_is_visible, +}; + +static struct attribute_group group_format_acr = { + .name = "format", + .attrs = format_acr_attrs, + .is_visible = acr_is_visible, +}; + static struct attribute_group group_default = { .attrs = intel_pmu_attrs, .is_visible = default_is_visible, @@ -5753,6 +6358,8 @@ static const struct attribute_group *attr_update[] = { &group_caps_lbr, &group_format_extra, &group_format_extra_skl, + &group_format_evtsel_ext, + &group_format_acr, &group_default, NULL, }; @@ -5780,6 +6387,54 @@ static struct attribute *adl_hybrid_events_attrs[] = { NULL, }; +EVENT_ATTR_STR_HYBRID(topdown-retiring, td_retiring_lnl, "event=0xc2,umask=0x02;event=0x00,umask=0x80", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-fe-bound, td_fe_bound_lnl, "event=0x9c,umask=0x01;event=0x00,umask=0x82", hybrid_big_small); +EVENT_ATTR_STR_HYBRID(topdown-be-bound, td_be_bound_lnl, "event=0xa4,umask=0x02;event=0x00,umask=0x83", hybrid_big_small); + +static struct attribute *lnl_hybrid_events_attrs[] = { + EVENT_PTR(slots_adl), + EVENT_PTR(td_retiring_lnl), + EVENT_PTR(td_bad_spec_adl), + EVENT_PTR(td_fe_bound_lnl), + EVENT_PTR(td_be_bound_lnl), + EVENT_PTR(td_heavy_ops_adl), + EVENT_PTR(td_br_mis_adl), + EVENT_PTR(td_fetch_lat_adl), + EVENT_PTR(td_mem_bound_adl), + NULL +}; + +/* The event string must be in PMU IDX order. */ +EVENT_ATTR_STR_HYBRID(topdown-retiring, + td_retiring_arl_h, + "event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0", + hybrid_big_small_tiny); +EVENT_ATTR_STR_HYBRID(topdown-bad-spec, + td_bad_spec_arl_h, + "event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0", + hybrid_big_small_tiny); +EVENT_ATTR_STR_HYBRID(topdown-fe-bound, + td_fe_bound_arl_h, + "event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0", + hybrid_big_small_tiny); +EVENT_ATTR_STR_HYBRID(topdown-be-bound, + td_be_bound_arl_h, + "event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0", + hybrid_big_small_tiny); + +static struct attribute *arl_h_hybrid_events_attrs[] = { + EVENT_PTR(slots_adl), + EVENT_PTR(td_retiring_arl_h), + EVENT_PTR(td_bad_spec_arl_h), + EVENT_PTR(td_fe_bound_arl_h), + EVENT_PTR(td_be_bound_arl_h), + EVENT_PTR(td_heavy_ops_adl), + EVENT_PTR(td_br_mis_adl), + EVENT_PTR(td_fetch_lat_adl), + EVENT_PTR(td_mem_bound_adl), + NULL, +}; + /* Must be in IDX order */ EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small); EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small); @@ -5798,6 +6453,21 @@ static struct attribute *mtl_hybrid_mem_attrs[] = { NULL }; +EVENT_ATTR_STR_HYBRID(mem-loads, + mem_ld_arl_h, + "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3", + hybrid_big_small_tiny); +EVENT_ATTR_STR_HYBRID(mem-stores, + mem_st_arl_h, + "event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6", + hybrid_big_small_tiny); + +static struct attribute *arl_h_hybrid_mem_attrs[] = { + EVENT_PTR(mem_ld_arl_h), + EVENT_PTR(mem_st_arl_h), + NULL, +}; + EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); @@ -5821,8 +6491,8 @@ static struct attribute *adl_hybrid_tsx_attrs[] = { FORMAT_ATTR_HYBRID(in_tx, hybrid_big); FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big); -FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small); -FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small); +FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny); +FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny); FORMAT_ATTR_HYBRID(frontend, hybrid_big); #define ADL_HYBRID_RTM_FORMAT_ATTR \ @@ -5845,7 +6515,7 @@ static struct attribute *adl_hybrid_extra_attr[] = { NULL }; -FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small); +FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny); static struct attribute *mtl_hybrid_extra_attr_rtm[] = { ADL_HYBRID_RTM_FORMAT_ATTR, @@ -5908,9 +6578,27 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj, return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0; } +static umode_t hybrid_td_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct x86_hybrid_pmu *pmu = + container_of(dev_get_drvdata(dev), struct x86_hybrid_pmu, pmu); + + if (!is_attr_for_this_pmu(kobj, attr)) + return 0; + + + /* Only the big core supports perf metrics */ + if (pmu->pmu_type == hybrid_big) + return pmu->intel_cap.perf_metrics ? attr->mode : 0; + + return attr->mode; +} + static struct attribute_group hybrid_group_events_td = { .name = "events", - .is_visible = hybrid_events_is_visible, + .is_visible = hybrid_td_is_visible, }; static struct attribute_group hybrid_group_events_mem = { @@ -5955,6 +6643,8 @@ static const struct attribute_group *hybrid_attr_update[] = { &group_caps_gen, &group_caps_lbr, &hybrid_group_format_extra, + &group_format_evtsel_ext, + &group_format_acr, &group_default, &hybrid_group_cpus, NULL, @@ -5962,29 +6652,9 @@ static const struct attribute_group *hybrid_attr_update[] = { static struct attribute *empty_attrs; -static void intel_pmu_check_num_counters(int *num_counters, - int *num_counters_fixed, - u64 *intel_ctrl, u64 fixed_mask) -{ - if (*num_counters > INTEL_PMC_MAX_GENERIC) { - WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", - *num_counters, INTEL_PMC_MAX_GENERIC); - *num_counters = INTEL_PMC_MAX_GENERIC; - } - *intel_ctrl = (1ULL << *num_counters) - 1; - - if (*num_counters_fixed > INTEL_PMC_MAX_FIXED) { - WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", - *num_counters_fixed, INTEL_PMC_MAX_FIXED); - *num_counters_fixed = INTEL_PMC_MAX_FIXED; - } - - *intel_ctrl |= fixed_mask << INTEL_PMC_IDX_FIXED; -} - static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, - int num_counters, - int num_counters_fixed, + u64 cntr_mask, + u64 fixed_cntr_mask, u64 intel_ctrl) { struct event_constraint *c; @@ -6021,10 +6691,9 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con * generic counters */ if (!use_fixed_pseudo_encoding(c->code)) - c->idxmsk64 |= (1ULL << num_counters) - 1; + c->idxmsk64 |= cntr_mask; } - c->idxmsk64 &= - ~(~0ULL << (INTEL_PMC_IDX_FIXED + num_counters_fixed)); + c->idxmsk64 &= cntr_mask | (fixed_cntr_mask << INTEL_PMC_IDX_FIXED); c->weight = hweight64(c->idxmsk64); } } @@ -6049,9 +6718,15 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs) } } +static inline int intel_pmu_v6_addr_offset(int index, bool eventsel) +{ + return MSR_IA32_PMC_V6_STEP * index; +} + static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { - { hybrid_small, "cpu_atom" }, - { hybrid_big, "cpu_core" }, + { hybrid_small, "cpu_atom" }, + { hybrid_big, "cpu_core" }, + { hybrid_tiny, "cpu_lowpower" }, }; static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) @@ -6075,21 +6750,20 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id; pmu->name = intel_hybrid_pmu_type_map[bit].name; - pmu->num_counters = x86_pmu.num_counters; - pmu->num_counters_fixed = x86_pmu.num_counters_fixed; - pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->cntr_mask64 = x86_pmu.cntr_mask64; + pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64; + pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64); + pmu->config_mask = X86_RAW_EVENT_MASK; pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); + __EVENT_CONSTRAINT(0, pmu->cntr_mask64, + 0, x86_pmu_num_counters(&pmu->pmu), 0, 0); pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; - if (pmu->pmu_type & hybrid_small) { + if (pmu->pmu_type & hybrid_small_tiny) { pmu->intel_cap.perf_metrics = 0; - pmu->intel_cap.pebs_output_pt_available = 1; pmu->mid_ack = true; } else if (pmu->pmu_type & hybrid_big) { pmu->intel_cap.perf_metrics = 1; - pmu->intel_cap.pebs_output_pt_available = 0; pmu->late_ack = true; } } @@ -6150,6 +6824,22 @@ static __always_inline void intel_pmu_init_grt(struct pmu *pmu) intel_pmu_ref_cycles_ext(); } +static __always_inline void intel_pmu_init_lnc(struct pmu *pmu) +{ + intel_pmu_init_glc(pmu); + hybrid(pmu, event_constraints) = intel_lnc_event_constraints; + hybrid(pmu, pebs_constraints) = intel_lnc_pebs_event_constraints; + hybrid(pmu, extra_regs) = intel_lnc_extra_regs; +} + +static __always_inline void intel_pmu_init_skt(struct pmu *pmu) +{ + intel_pmu_init_grt(pmu); + hybrid(pmu, event_constraints) = intel_skt_event_constraints; + hybrid(pmu, extra_regs) = intel_cmt_extra_regs; + static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr); +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6166,15 +6856,21 @@ __init int intel_pmu_init(void) char *name; struct x86_hybrid_pmu *pmu; + /* Architectural Perfmon was introduced starting with Core "Yonah" */ if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { - case 0x6: - return p6_pmu_init(); - case 0xb: + case 6: + if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH) + return p6_pmu_init(); + break; + case 11: return knc_pmu_init(); - case 0xf: + case 15: return p4_pmu_init(); } + + pr_cont("unsupported CPU family %d model %d ", + boot_cpu_data.x86, boot_cpu_data.x86_model); return -ENODEV; } @@ -6193,15 +6889,16 @@ __init int intel_pmu_init(void) x86_pmu = intel_pmu; x86_pmu.version = version; - x86_pmu.num_counters = eax.split.num_counters; + x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0); x86_pmu.cntval_bits = eax.split.bit_width; x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; x86_pmu.events_maskl = ebx.full; x86_pmu.events_mask_len = eax.split.mask_length; - x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); + x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64); x86_pmu.pebs_capable = PEBS_COUNTER_MASK; + x86_pmu.config_mask = X86_RAW_EVENT_MASK; /* * Quirk: v2 perfmon does not report fixed-purpose events, so @@ -6210,17 +6907,15 @@ __init int intel_pmu_init(void) if (version > 1 && version < 5) { int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); - x86_pmu.num_counters_fixed = - max((int)edx.split.num_counters_fixed, assume); - - fixed_mask = (1L << x86_pmu.num_counters_fixed) - 1; + x86_pmu.fixed_cntr_mask64 = + GENMASK_ULL(max((int)edx.split.num_counters_fixed, assume) - 1, 0); } else if (version >= 5) - x86_pmu.num_counters_fixed = fls(fixed_mask); + x86_pmu.fixed_cntr_mask64 = fixed_mask; if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; - rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); + rdmsrq(MSR_IA32_PERF_CAPABILITIES, capabilities); x86_pmu.intel_cap.capabilities = capabilities; } @@ -6232,7 +6927,7 @@ __init int intel_pmu_init(void) if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) intel_pmu_arch_lbr_init(); - intel_ds_init(); + intel_pebs_init(); x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ @@ -6243,21 +6938,27 @@ __init int intel_pmu_init(void) } /* + * Many features on and after V6 require dynamic constraint, + * e.g., Arch PEBS, ACR. + */ + if (version >= 6) + x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT; + /* * Install the hw-cache-events table: */ - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_CORE_YONAH: + switch (boot_cpu_data.x86_vfm) { + case INTEL_CORE_YONAH: pr_cont("Core events, "); name = "core"; break; - case INTEL_FAM6_CORE2_MEROM: + case INTEL_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); fallthrough; - case INTEL_FAM6_CORE2_MEROM_L: - case INTEL_FAM6_CORE2_PENRYN: - case INTEL_FAM6_CORE2_DUNNINGTON: + case INTEL_CORE2_MEROM_L: + case INTEL_CORE2_PENRYN: + case INTEL_CORE2_DUNNINGTON: memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6269,9 +6970,9 @@ __init int intel_pmu_init(void) name = "core2"; break; - case INTEL_FAM6_NEHALEM: - case INTEL_FAM6_NEHALEM_EP: - case INTEL_FAM6_NEHALEM_EX: + case INTEL_NEHALEM: + case INTEL_NEHALEM_EP: + case INTEL_NEHALEM_EX: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -6303,11 +7004,11 @@ __init int intel_pmu_init(void) name = "nehalem"; break; - case INTEL_FAM6_ATOM_BONNELL: - case INTEL_FAM6_ATOM_BONNELL_MID: - case INTEL_FAM6_ATOM_SALTWELL: - case INTEL_FAM6_ATOM_SALTWELL_MID: - case INTEL_FAM6_ATOM_SALTWELL_TABLET: + case INTEL_ATOM_BONNELL: + case INTEL_ATOM_BONNELL_MID: + case INTEL_ATOM_SALTWELL: + case INTEL_ATOM_SALTWELL_MID: + case INTEL_ATOM_SALTWELL_TABLET: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6320,11 +7021,11 @@ __init int intel_pmu_init(void) name = "bonnell"; break; - case INTEL_FAM6_ATOM_SILVERMONT: - case INTEL_FAM6_ATOM_SILVERMONT_D: - case INTEL_FAM6_ATOM_SILVERMONT_MID: - case INTEL_FAM6_ATOM_AIRMONT: - case INTEL_FAM6_ATOM_AIRMONT_MID: + case INTEL_ATOM_SILVERMONT: + case INTEL_ATOM_SILVERMONT_D: + case INTEL_ATOM_SILVERMONT_MID: + case INTEL_ATOM_AIRMONT: + case INTEL_ATOM_SILVERMONT_MID2: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -6342,8 +7043,8 @@ __init int intel_pmu_init(void) name = "silvermont"; break; - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_D: memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -6369,7 +7070,7 @@ __init int intel_pmu_init(void) name = "goldmont"; break; - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_ATOM_GOLDMONT_PLUS: memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, @@ -6398,9 +7099,9 @@ __init int intel_pmu_init(void) name = "goldmont_plus"; break; - case INTEL_FAM6_ATOM_TREMONT_D: - case INTEL_FAM6_ATOM_TREMONT: - case INTEL_FAM6_ATOM_TREMONT_L: + case INTEL_ATOM_TREMONT_D: + case INTEL_ATOM_TREMONT: + case INTEL_ATOM_TREMONT_L: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6427,10 +7128,10 @@ __init int intel_pmu_init(void) name = "Tremont"; break; - case INTEL_FAM6_ATOM_GRACEMONT: + case INTEL_ATOM_GRACEMONT: intel_pmu_init_grt(NULL); intel_pmu_pebs_data_source_grt(); - x86_pmu.pebs_latency_data = adl_latency_data_small; + x86_pmu.pebs_latency_data = grt_latency_data; x86_pmu.get_event_constraints = tnt_get_event_constraints; td_attr = tnt_events_attrs; mem_attr = grt_mem_attrs; @@ -6439,12 +7140,12 @@ __init int intel_pmu_init(void) name = "gracemont"; break; - case INTEL_FAM6_ATOM_CRESTMONT: - case INTEL_FAM6_ATOM_CRESTMONT_X: + case INTEL_ATOM_CRESTMONT: + case INTEL_ATOM_CRESTMONT_X: intel_pmu_init_grt(NULL); x86_pmu.extra_regs = intel_cmt_extra_regs; intel_pmu_pebs_data_source_cmt(); - x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.pebs_latency_data = cmt_latency_data; x86_pmu.get_event_constraints = cmt_get_event_constraints; td_attr = cmt_events_attrs; mem_attr = grt_mem_attrs; @@ -6453,9 +7154,21 @@ __init int intel_pmu_init(void) name = "crestmont"; break; - case INTEL_FAM6_WESTMERE: - case INTEL_FAM6_WESTMERE_EP: - case INTEL_FAM6_WESTMERE_EX: + case INTEL_ATOM_DARKMONT_X: + intel_pmu_init_skt(NULL); + intel_pmu_pebs_data_source_cmt(); + x86_pmu.pebs_latency_data = cmt_latency_data; + x86_pmu.get_event_constraints = cmt_get_event_constraints; + td_attr = skt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = cmt_format_attr; + pr_cont("Darkmont events, "); + name = "darkmont"; + break; + + case INTEL_WESTMERE: + case INTEL_WESTMERE_EP: + case INTEL_WESTMERE_EX: memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -6484,8 +7197,8 @@ __init int intel_pmu_init(void) name = "westmere"; break; - case INTEL_FAM6_SANDYBRIDGE: - case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_SANDYBRIDGE: + case INTEL_SANDYBRIDGE_X: x86_add_quirk(intel_sandybridge_quirk); x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, @@ -6498,7 +7211,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X) + if (boot_cpu_data.x86_vfm == INTEL_SANDYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -6524,8 +7237,8 @@ __init int intel_pmu_init(void) name = "sandybridge"; break; - case INTEL_FAM6_IVYBRIDGE: - case INTEL_FAM6_IVYBRIDGE_X: + case INTEL_IVYBRIDGE: + case INTEL_IVYBRIDGE_X: x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6541,7 +7254,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; - if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X) + if (boot_cpu_data.x86_vfm == INTEL_IVYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -6563,10 +7276,10 @@ __init int intel_pmu_init(void) break; - case INTEL_FAM6_HASWELL: - case INTEL_FAM6_HASWELL_X: - case INTEL_FAM6_HASWELL_L: - case INTEL_FAM6_HASWELL_G: + case INTEL_HASWELL: + case INTEL_HASWELL_X: + case INTEL_HASWELL_L: + case INTEL_HASWELL_G: x86_add_quirk(intel_ht_bug); x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; @@ -6586,6 +7299,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.limit_period = hsw_limit_period; x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; @@ -6596,10 +7310,10 @@ __init int intel_pmu_init(void) name = "haswell"; break; - case INTEL_FAM6_BROADWELL: - case INTEL_FAM6_BROADWELL_D: - case INTEL_FAM6_BROADWELL_G: - case INTEL_FAM6_BROADWELL_X: + case INTEL_BROADWELL: + case INTEL_BROADWELL_D: + case INTEL_BROADWELL_G: + case INTEL_BROADWELL_X: x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6638,8 +7352,8 @@ __init int intel_pmu_init(void) name = "broadwell"; break; - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: + case INTEL_XEON_PHI_KNL: + case INTEL_XEON_PHI_KNM: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -6658,15 +7372,15 @@ __init int intel_pmu_init(void) name = "knights-landing"; break; - case INTEL_FAM6_SKYLAKE_X: + case INTEL_SKYLAKE_X: pmem = true; fallthrough; - case INTEL_FAM6_SKYLAKE_L: - case INTEL_FAM6_SKYLAKE: - case INTEL_FAM6_KABYLAKE_L: - case INTEL_FAM6_KABYLAKE: - case INTEL_FAM6_COMETLAKE_L: - case INTEL_FAM6_COMETLAKE: + case INTEL_SKYLAKE_L: + case INTEL_SKYLAKE: + case INTEL_KABYLAKE_L: + case INTEL_KABYLAKE: + case INTEL_COMETLAKE_L: + case INTEL_COMETLAKE: x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6715,16 +7429,16 @@ __init int intel_pmu_init(void) name = "skylake"; break; - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_ICELAKE_D: + case INTEL_ICELAKE_X: + case INTEL_ICELAKE_D: x86_pmu.pebs_ept = 1; pmem = true; fallthrough; - case INTEL_FAM6_ICELAKE_L: - case INTEL_FAM6_ICELAKE: - case INTEL_FAM6_TIGERLAKE_L: - case INTEL_FAM6_TIGERLAKE: - case INTEL_FAM6_ROCKETLAKE: + case INTEL_ICELAKE_L: + case INTEL_ICELAKE: + case INTEL_TIGERLAKE_L: + case INTEL_TIGERLAKE: + case INTEL_ROCKETLAKE: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -6759,16 +7473,22 @@ __init int intel_pmu_init(void) name = "icelake"; break; - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_EMERALDRAPIDS_X: + case INTEL_SAPPHIRERAPIDS_X: + case INTEL_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.extra_regs = intel_glc_extra_regs; - fallthrough; - case INTEL_FAM6_GRANITERAPIDS_X: - case INTEL_FAM6_GRANITERAPIDS_D: + pr_cont("Sapphire Rapids events, "); + name = "sapphire_rapids"; + goto glc_common; + + case INTEL_GRANITERAPIDS_X: + case INTEL_GRANITERAPIDS_D: + x86_pmu.extra_regs = intel_rwc_extra_regs; + pr_cont("Granite Rapids events, "); + name = "granite_rapids"; + + glc_common: intel_pmu_init_glc(NULL); - if (!x86_pmu.extra_regs) - x86_pmu.extra_regs = intel_rwc_extra_regs; x86_pmu.pebs_ept = 1; x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = glc_get_event_constraints; @@ -6779,15 +7499,13 @@ __init int intel_pmu_init(void) td_attr = glc_td_events_attrs; tsx_attr = glc_tsx_events_attrs; intel_pmu_pebs_data_source_skl(true); - pr_cont("Sapphire Rapids events, "); - name = "sapphire_rapids"; break; - case INTEL_FAM6_ALDERLAKE: - case INTEL_FAM6_ALDERLAKE_L: - case INTEL_FAM6_RAPTORLAKE: - case INTEL_FAM6_RAPTORLAKE_P: - case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_ALDERLAKE: + case INTEL_ALDERLAKE_L: + case INTEL_RAPTORLAKE: + case INTEL_RAPTORLAKE_P: + case INTEL_RAPTORLAKE_S: /* * Alder Lake has 2 types of CPU, core and atom. * @@ -6795,7 +7513,7 @@ __init int intel_pmu_init(void) */ intel_pmu_init_hybrid(hybrid_big_small); - x86_pmu.pebs_latency_data = adl_latency_data_small; + x86_pmu.pebs_latency_data = grt_latency_data; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; @@ -6810,11 +7528,13 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; intel_pmu_init_glc(&pmu->pmu); if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { - pmu->num_counters = x86_pmu.num_counters + 2; - pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; + pmu->cntr_mask64 <<= 2; + pmu->cntr_mask64 |= 0x3; + pmu->fixed_cntr_mask64 <<= 1; + pmu->fixed_cntr_mask64 |= 0x1; } else { - pmu->num_counters = x86_pmu.num_counters; - pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + pmu->cntr_mask64 = x86_pmu.cntr_mask64; + pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64; } /* @@ -6824,15 +7544,16 @@ __init int intel_pmu_init(void) * mistakenly add extra counters for P-cores. Correct the number of * counters here. */ - if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) { - pmu->num_counters = x86_pmu.num_counters; - pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + if ((x86_pmu_num_counters(&pmu->pmu) > 8) || (x86_pmu_num_counters_fixed(&pmu->pmu) > 4)) { + pmu->cntr_mask64 = x86_pmu.cntr_mask64; + pmu->fixed_cntr_mask64 = x86_pmu.fixed_cntr_mask64; } - pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->pebs_events_mask = intel_pmu_pebs_mask(pmu->cntr_mask64); pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); + __EVENT_CONSTRAINT(0, pmu->cntr_mask64, + 0, x86_pmu_num_counters(&pmu->pmu), 0, 0); + pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ @@ -6845,11 +7566,12 @@ __init int intel_pmu_init(void) name = "alderlake_hybrid"; break; - case INTEL_FAM6_METEORLAKE: - case INTEL_FAM6_METEORLAKE_L: + case INTEL_METEORLAKE: + case INTEL_METEORLAKE_L: + case INTEL_ARROWLAKE_U: intel_pmu_init_hybrid(hybrid_big_small); - x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.pebs_latency_data = cmt_latency_data; x86_pmu.get_event_constraints = mtl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; @@ -6874,6 +7596,71 @@ __init int intel_pmu_init(void) name = "meteorlake_hybrid"; break; + case INTEL_PANTHERLAKE_L: + pr_cont("Pantherlake Hybrid events, "); + name = "pantherlake_hybrid"; + goto lnl_common; + + case INTEL_LUNARLAKE_M: + case INTEL_ARROWLAKE: + pr_cont("Lunarlake Hybrid events, "); + name = "lunarlake_hybrid"; + + lnl_common: + intel_pmu_init_hybrid(hybrid_big_small); + + x86_pmu.pebs_latency_data = lnl_latency_data; + x86_pmu.get_event_constraints = mtl_get_event_constraints; + x86_pmu.hw_config = adl_hw_config; + + td_attr = lnl_hybrid_events_attrs; + mem_attr = mtl_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + intel_pmu_init_lnc(&pmu->pmu); + + /* Initialize Atom core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; + intel_pmu_init_skt(&pmu->pmu); + + intel_pmu_pebs_data_source_lnl(); + break; + + case INTEL_ARROWLAKE_H: + intel_pmu_init_hybrid(hybrid_big_small_tiny); + + x86_pmu.pebs_latency_data = arl_h_latency_data; + x86_pmu.get_event_constraints = arl_h_get_event_constraints; + x86_pmu.hw_config = arl_h_hw_config; + + td_attr = arl_h_hybrid_events_attrs; + mem_attr = arl_h_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities. */ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + intel_pmu_init_lnc(&pmu->pmu); + + /* Initialize Atom core specific PerfMon capabilities. */ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; + intel_pmu_init_skt(&pmu->pmu); + + /* Initialize Lower Power Atom specific PerfMon capabilities. */ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX]; + intel_pmu_init_grt(&pmu->pmu); + pmu->extra_regs = intel_cmt_extra_regs; + + intel_pmu_pebs_data_source_arl_h(); + pr_cont("ArrowLake-H Hybrid events, "); + name = "arrowlake_h_hybrid"; + break; + default: switch (x86_pmu.version) { case 1: @@ -6899,9 +7686,9 @@ __init int intel_pmu_init(void) * The constraints may be cut according to the CPUID enumeration * by inserting the EVENT_CONSTRAINT_END. */ - if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) - x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; - intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1; + if (fls64(x86_pmu.fixed_cntr_mask64) > INTEL_PMC_MAX_FIXED) + x86_pmu.fixed_cntr_mask64 &= GENMASK_ULL(INTEL_PMC_MAX_FIXED - 1, 0); + intel_v5_gen_event_constraints[fls64(x86_pmu.fixed_cntr_mask64)].weight = -1; x86_pmu.event_constraints = intel_v5_gen_event_constraints; pr_cont("generic architected perfmon, "); name = "generic_arch_v5+"; @@ -6928,18 +7715,29 @@ __init int intel_pmu_init(void) x86_pmu.attr_update = hybrid_attr_update; } - intel_pmu_check_num_counters(&x86_pmu.num_counters, - &x86_pmu.num_counters_fixed, - &x86_pmu.intel_ctrl, - (u64)fixed_mask); + /* + * The archPerfmonExt (0x23) includes an enhanced enumeration of + * PMU architectural features with a per-core view. For non-hybrid, + * each core has the same PMU capabilities. It's good enough to + * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu + * is used to keep the common capabilities. Still keep the values + * from the leaf 0xa. The core specific update will be done later + * when a new type is online. + */ + if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) + update_pmu_cap(NULL); + + intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64, + &x86_pmu.fixed_cntr_mask64, + &x86_pmu.intel_ctrl); /* AnyThread may be deprecated on arch perfmon v5 or later */ if (x86_pmu.intel_cap.anythread_deprecated) x86_pmu.format_attrs = intel_arch_formats_attr; intel_pmu_check_event_constraints(x86_pmu.event_constraints, - x86_pmu.num_counters, - x86_pmu.num_counters_fixed, + x86_pmu.cntr_mask64, + x86_pmu.fixed_cntr_mask64, x86_pmu.intel_ctrl); /* * Access LBR MSR may cause #GP under certain circumstances. @@ -6980,6 +7778,14 @@ __init int intel_pmu_init(void) pr_cont("full-width counters, "); } + /* Support V6+ MSR Aliasing */ + if (x86_pmu.version >= 6) { + x86_pmu.perfctr = MSR_IA32_PMC_V6_GP0_CTR; + x86_pmu.eventsel = MSR_IA32_PMC_V6_GP0_CFG_A; + x86_pmu.fixedctr = MSR_IA32_PMC_V6_FX0_CTR; + x86_pmu.addr_offset = intel_pmu_v6_addr_offset; + } + if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 54eb142810fb..ec753e39b007 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -41,7 +41,7 @@ * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL - * MTL,SRF,GRR + * MTL,SRF,GRR,ARL,LNL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -53,50 +53,50 @@ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, - * GRR + * GRR,ARL,LNL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL,RPL,MTL + * ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, - * RPL,SPR,MTL + * RPL,SPR,MTL,ARL,LNL,SRF * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL,RPL,MTL + * ADL,RPL,MTL,ARL,LNL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF + * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, + * ARL,LNL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL,RPL,MTL + * KBL,CML,ICL,TGL,RKL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL,MTL + * ADL,RPL,MTL,ARL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL,RPL,MTL + * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL,RPL,MTL + * TNT,RKL,ADL,RPL,MTL,ARL,LNL * Scope: Package (physical package) * MSR_MODULE_C6_RES_MS: Module C6 Residency Counter. * perf code: 0x00 @@ -111,9 +111,11 @@ #include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/msr.h> #include "../perf_event.h" #include "../probe.h" +MODULE_DESCRIPTION("Support for Intel cstate performance events"); MODULE_LICENSE("GPL"); #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ @@ -127,10 +129,6 @@ static ssize_t __cstate_##_var##_show(struct device *dev, \ static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) -static ssize_t cstate_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, - char *buf); - /* Model -> events mapping */ struct cstate_model { unsigned long core_events; @@ -143,12 +141,6 @@ struct cstate_model { #define SLM_PKG_C6_USE_C7_MSR (1UL << 0) #define KNL_CORE_C6_MSR (1UL << 1) -struct perf_cstate_msr { - u64 msr; - struct perf_pmu_events_attr *attr; -}; - - /* cstate_core PMU */ static struct pmu cstate_core_pmu; static bool has_cstate_core; @@ -211,22 +203,9 @@ static struct attribute_group cstate_format_attr_group = { .attrs = cstate_format_attrs, }; -static cpumask_t cstate_core_cpu_mask; -static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL); - -static struct attribute *cstate_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group cpumask_attr_group = { - .attrs = cstate_cpumask_attrs, -}; - static const struct attribute_group *cstate_attr_groups[] = { &cstate_events_attr_group, &cstate_format_attr_group, - &cpumask_attr_group, NULL, }; @@ -274,8 +253,6 @@ static struct perf_msr pkg_msr[] = { [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, }; -static cpumask_t cstate_pkg_cpu_mask; - /* cstate_module PMU */ static struct pmu cstate_module_pmu; static bool has_cstate_module; @@ -296,28 +273,9 @@ static struct perf_msr module_msr[] = { [PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr }, }; -static cpumask_t cstate_module_cpu_mask; - -static ssize_t cstate_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct pmu *pmu = dev_get_drvdata(dev); - - if (pmu == &cstate_core_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask); - else if (pmu == &cstate_pkg_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask); - else if (pmu == &cstate_module_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask); - else - return 0; -} - static int cstate_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config; - int cpu; if (event->attr.type != event->pmu->type) return -ENOENT; @@ -336,20 +294,13 @@ static int cstate_pmu_event_init(struct perf_event *event) if (!(core_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_core_cpu_mask, - topology_sibling_cpumask(event->cpu)); } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!(pkg_msr_mask & (1 << cfg))) return -EINVAL; - - event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - event->hw.event_base = pkg_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_die_cpumask(event->cpu)); } else if (event->pmu == &cstate_module_pmu) { if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX) return -EINVAL; @@ -357,16 +308,10 @@ static int cstate_pmu_event_init(struct perf_event *event) if (!(module_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = module_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_module_cpu_mask, - topology_cluster_cpumask(event->cpu)); } else { return -ENOENT; } - if (cpu >= nr_cpu_ids) - return -ENODEV; - - event->cpu = cpu; event->hw.config = cfg; event->hw.idx = -1; return 0; @@ -376,7 +321,7 @@ static inline u64 cstate_pmu_read_counter(struct perf_event *event) { u64 val; - rdmsrl(event->hw.event_base, val); + rdmsrq(event->hw.event_base, val); return val; } @@ -417,84 +362,6 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) return 0; } -/* - * Check if exiting cpu is the designated reader. If so migrate the - * events when there is a valid target available - */ -static int cstate_cpu_exit(unsigned int cpu) -{ - unsigned int target; - - if (has_cstate_core && - cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) { - - target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_core_cpu_mask); - perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); - } - } - - if (has_cstate_pkg && - cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { - - target = cpumask_any_but(topology_die_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_pkg_cpu_mask); - perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); - } - } - - if (has_cstate_module && - cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) { - - target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_module_cpu_mask); - perf_pmu_migrate_context(&cstate_module_pmu, cpu, target); - } - } - return 0; -} - -static int cstate_cpu_init(unsigned int cpu) -{ - unsigned int target; - - /* - * If this is the first online thread of that core, set it in - * the core cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_core_cpu_mask, - topology_sibling_cpumask(cpu)); - - if (has_cstate_core && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_core_cpu_mask); - - /* - * If this is the first online thread of that package, set it - * in the package cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_die_cpumask(cpu)); - if (has_cstate_pkg && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); - - /* - * If this is the first online thread of that cluster, set it - * in the cluster cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_module_cpu_mask, - topology_cluster_cpumask(cpu)); - if (has_cstate_module && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_module_cpu_mask); - - return 0; -} - static const struct attribute_group *core_attr_update[] = { &group_cstate_core_c1, &group_cstate_core_c3, @@ -531,6 +398,7 @@ static struct pmu cstate_core_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_CORE, .module = THIS_MODULE, }; @@ -546,6 +414,7 @@ static struct pmu cstate_pkg_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_PKG, .module = THIS_MODULE, }; @@ -561,6 +430,7 @@ static struct pmu cstate_module_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_CLUSTER, .module = THIS_MODULE, }; @@ -642,9 +512,18 @@ static const struct cstate_model adl_cstates __initconst = { .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | BIT(PERF_CSTATE_PKG_C3_RES) | BIT(PERF_CSTATE_PKG_C6_RES) | - BIT(PERF_CSTATE_PKG_C7_RES) | BIT(PERF_CSTATE_PKG_C8_RES) | - BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + +static const struct cstate_model lnl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | BIT(PERF_CSTATE_PKG_C10_RES), }; @@ -689,7 +568,8 @@ static const struct cstate_model srf_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | BIT(PERF_CSTATE_CORE_C6_RES), - .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), .module_events = BIT(PERF_CSTATE_MODULE_C6_RES), }; @@ -768,6 +648,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_cstates), X86_MATCH_VFM(INTEL_METEORLAKE, &adl_cstates), X86_MATCH_VFM(INTEL_METEORLAKE_L, &adl_cstates), + X86_MATCH_VFM(INTEL_ARROWLAKE, &adl_cstates), + X86_MATCH_VFM(INTEL_ARROWLAKE_H, &adl_cstates), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &adl_cstates), + X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); @@ -801,9 +685,6 @@ static int __init cstate_probe(const struct cstate_model *cm) static inline void cstate_cleanup(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); - if (has_cstate_core) perf_pmu_unregister(&cstate_core_pmu); @@ -818,11 +699,6 @@ static int __init cstate_init(void) { int err; - cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, - "perf/x86/cstate:starting", cstate_cpu_init, NULL); - cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, - "perf/x86/cstate:online", NULL, cstate_cpu_exit); - if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { @@ -835,6 +711,8 @@ static int __init cstate_init(void) if (has_cstate_pkg) { if (topology_max_dies_per_package() > 1) { + /* CLX-AP is multi-die and the cstate is die-scope */ + cstate_pkg_pmu.scope = PERF_PMU_SCOPE_DIE; err = perf_pmu_register(&cstate_pkg_pmu, "cstate_die", -1); } else { diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index e010bfed8417..c0b7ac1c7594 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -10,6 +10,7 @@ #include <asm/tlbflush.h> #include <asm/insn.h> #include <asm/io.h> +#include <asm/msr.h> #include <asm/timer.h> #include "../perf_event.h" @@ -63,6 +64,15 @@ union intel_x86_pebs_dse { unsigned int mtl_fwd_blk:1; unsigned int ld_reserved4:24; }; + struct { + unsigned int lnc_dse:8; + unsigned int ld_reserved5:2; + unsigned int lnc_stlb_miss:1; + unsigned int lnc_locked:1; + unsigned int lnc_data_blk:1; + unsigned int lnc_addr_blk:1; + unsigned int ld_reserved6:18; + }; }; @@ -77,7 +87,7 @@ union intel_x86_pebs_dse { #define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) /* Version for Sandy Bridge and later */ -static u64 pebs_data_source[] = { +static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = { P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */ @@ -168,11 +178,56 @@ void __init intel_pmu_pebs_data_source_mtl(void) __intel_pmu_pebs_data_source_cmt(data_source); } +void __init intel_pmu_pebs_data_source_arl_h(void) +{ + u64 *data_source; + + intel_pmu_pebs_data_source_lnl(); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_cmt(data_source); +} + void __init intel_pmu_pebs_data_source_cmt(void) { __intel_pmu_pebs_data_source_cmt(pebs_data_source); } +/* Version for Lion Cove and later */ +static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* 0x00: ukn L3 */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: LFB/L1 Miss Handling Buffer hit */ + 0, /* 0x04: Reserved */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x05: L2 Hit */ + OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE), /* 0x06: L2 Miss Handling Buffer Hit */ + 0, /* 0x07: Reserved */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x08: L3 Hit */ + 0, /* 0x09: Reserved */ + 0, /* 0x0a: Reserved */ + 0, /* 0x0b: Reserved */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* 0x0c: L3 Hit Snoop Fwd */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0d: L3 Hit Snoop HitM */ + 0, /* 0x0e: Reserved */ + P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0f: L3 Miss Snoop HitM */ + OP_LH | LEVEL(MSC) | P(SNOOP, NONE), /* 0x10: Memory-side Cache Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */ +}; + +void __init intel_pmu_pebs_data_source_lnl(void) +{ + u64 *data_source; + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; + memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source)); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_cmt(data_source); +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; @@ -257,14 +312,14 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) } /* Retrieve the latency data for e-core of ADL */ -static u64 __adl_latency_data_small(struct perf_event *event, u64 status, - u8 dse, bool tlb, bool lock, bool blk) +static u64 __grt_latency_data(struct perf_event *event, u64 status, + u8 dse, bool tlb, bool lock, bool blk) { u64 val; WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big); - dse &= PERF_PEBS_DATA_SOURCE_MASK; + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; val = hybrid_var(event->pmu, pebs_data_source)[dse]; pebs_set_tlb_lock(&val, tlb, lock); @@ -277,27 +332,82 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status, return val; } -u64 adl_latency_data_small(struct perf_event *event, u64 status) +u64 grt_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; dse.val = status; - return __adl_latency_data_small(event, status, dse.ld_dse, - dse.ld_locked, dse.ld_stlb_miss, - dse.ld_data_blk); + return __grt_latency_data(event, status, dse.ld_dse, + dse.ld_locked, dse.ld_stlb_miss, + dse.ld_data_blk); } /* Retrieve the latency data for e-core of MTL */ -u64 mtl_latency_data_small(struct perf_event *event, u64 status) +u64 cmt_latency_data(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + + dse.val = status; + + return __grt_latency_data(event, status, dse.mtl_dse, + dse.mtl_stlb_miss, dse.mtl_locked, + dse.mtl_fwd_blk); +} + +static u64 lnc_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; + union perf_mem_data_src src; + u64 val; dse.val = status; - return __adl_latency_data_small(event, status, dse.mtl_dse, - dse.mtl_stlb_miss, dse.mtl_locked, - dse.mtl_fwd_blk); + /* LNC core latency data */ + val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK]; + if (!val) + val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA); + + if (dse.lnc_stlb_miss) + val |= P(TLB, MISS) | P(TLB, L2); + else + val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + if (dse.lnc_locked) + val |= P(LOCK, LOCKED); + + if (dse.lnc_data_blk) + val |= P(BLK, DATA); + if (dse.lnc_addr_blk) + val |= P(BLK, ADDR); + if (!dse.lnc_data_blk && !dse.lnc_addr_blk) + val |= P(BLK, NA); + + src.val = val; + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + src.mem_op = P(OP, STORE); + + return src.val; +} + +u64 lnl_latency_data(struct perf_event *event, u64 status) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->pmu_type == hybrid_small) + return cmt_latency_data(event, status); + + return lnc_latency_data(event, status); +} + +u64 arl_h_latency_data(struct perf_event *event, u64 status) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->pmu_type == hybrid_tiny) + return cmt_latency_data(event, status); + + return lnl_latency_data(event, status); } static u64 load_latency_data(struct perf_event *event, u64 status) @@ -515,7 +625,7 @@ static int alloc_pebs_buffer(int cpu) int max, node = cpu_to_node(cpu); void *buffer, *insn_buff, *cea; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return 0; buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); @@ -550,7 +660,7 @@ static void release_pebs_buffer(int cpu) struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); void *cea; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return; kfree(per_cpu(insn_buffer, cpu)); @@ -625,7 +735,7 @@ void release_ds_buffers(void) { int cpu; - if (!x86_pmu.bts && !x86_pmu.pebs) + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; for_each_possible_cpu(cpu) @@ -641,7 +751,8 @@ void release_ds_buffers(void) } for_each_possible_cpu(cpu) { - release_pebs_buffer(cpu); + if (x86_pmu.ds_pebs) + release_pebs_buffer(cpu); release_bts_buffer(cpu); } } @@ -652,15 +763,17 @@ void reserve_ds_buffers(void) int cpu; x86_pmu.bts_active = 0; - x86_pmu.pebs_active = 0; - if (!x86_pmu.bts && !x86_pmu.pebs) + if (x86_pmu.ds_pebs) + x86_pmu.pebs_active = 0; + + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; if (!x86_pmu.bts) bts_err = 1; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) pebs_err = 1; for_each_possible_cpu(cpu) { @@ -672,7 +785,8 @@ void reserve_ds_buffers(void) if (!bts_err && alloc_bts_buffer(cpu)) bts_err = 1; - if (!pebs_err && alloc_pebs_buffer(cpu)) + if (x86_pmu.ds_pebs && !pebs_err && + alloc_pebs_buffer(cpu)) pebs_err = 1; if (bts_err && pebs_err) @@ -684,7 +798,7 @@ void reserve_ds_buffers(void) release_bts_buffer(cpu); } - if (pebs_err) { + if (x86_pmu.ds_pebs && pebs_err) { for_each_possible_cpu(cpu) release_pebs_buffer(cpu); } @@ -696,7 +810,7 @@ void reserve_ds_buffers(void) if (x86_pmu.bts && !bts_err) x86_pmu.bts_active = 1; - if (x86_pmu.pebs && !pebs_err) + if (x86_pmu.ds_pebs && !pebs_err) x86_pmu.pebs_active = 1; for_each_possible_cpu(cpu) { @@ -844,11 +958,11 @@ unlock: return 1; } -static inline void intel_pmu_drain_pebs_buffer(void) +void intel_pmu_drain_pebs_buffer(void) { struct perf_sample_data data; - x86_pmu.drain_pebs(NULL, &data); + static_call(x86_pmu_drain_pebs)(NULL, &data); } /* @@ -1086,6 +1200,32 @@ struct event_constraint intel_glc_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_lnc_pebs_event_constraints[] = { + INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), + + INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc), + INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3), + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf), /* MEM_INST_RETIRED.SPLIT_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf), /* MEM_INST_RETIRED.SPLIT_STORES */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf), /* MEM_INST_RETIRED.ALL_LOADS */ + INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf), /* MEM_INST_RETIRED.ALL_STORES */ + + INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), + + INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), + + /* + * Everything else is handled by PMU_FL_PEBS_ALL, because we + * need the full constraints from the main table. + */ + + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *pebs_constraints = hybrid(event->pmu, pebs_constraints); @@ -1137,8 +1277,7 @@ void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sche static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) { struct debug_store *ds = cpuc->ds; - int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events); - int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); + int max_pebs_events = intel_pmu_max_num_pebs(cpuc->pmu); u64 threshold; int reserved; @@ -1146,7 +1285,7 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) return; if (x86_pmu.flags & PMU_FL_PEBS_ALL) - reserved = max_pebs_events + num_counters_fixed; + reserved = max_pebs_events + x86_pmu_max_num_counters_fixed(cpuc->pmu); else reserved = max_pebs_events; @@ -1160,6 +1299,19 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) ds->pebs_interrupt_threshold = threshold; } +#define PEBS_DATACFG_CNTRS(x) \ + ((x >> PEBS_DATACFG_CNTR_SHIFT) & PEBS_DATACFG_CNTR_MASK) + +#define PEBS_DATACFG_CNTR_BIT(x) \ + (((1ULL << x) & PEBS_DATACFG_CNTR_MASK) << PEBS_DATACFG_CNTR_SHIFT) + +#define PEBS_DATACFG_FIX(x) \ + ((x >> PEBS_DATACFG_FIX_SHIFT) & PEBS_DATACFG_FIX_MASK) + +#define PEBS_DATACFG_FIX_BIT(x) \ + (((1ULL << (x)) & PEBS_DATACFG_FIX_MASK) \ + << PEBS_DATACFG_FIX_SHIFT) + static void adaptive_pebs_record_size_update(void) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1174,10 +1326,57 @@ static void adaptive_pebs_record_size_update(void) sz += sizeof(struct pebs_xmm); if (pebs_data_cfg & PEBS_DATACFG_LBRS) sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry); + if (pebs_data_cfg & (PEBS_DATACFG_METRICS | PEBS_DATACFG_CNTR)) { + sz += sizeof(struct pebs_cntr_header); + + /* Metrics base and Metrics Data */ + if (pebs_data_cfg & PEBS_DATACFG_METRICS) + sz += 2 * sizeof(u64); + + if (pebs_data_cfg & PEBS_DATACFG_CNTR) { + sz += (hweight64(PEBS_DATACFG_CNTRS(pebs_data_cfg)) + + hweight64(PEBS_DATACFG_FIX(pebs_data_cfg))) * + sizeof(u64); + } + } cpuc->pebs_record_size = sz; } +static void __intel_pmu_pebs_update_cfg(struct perf_event *event, + int idx, u64 *pebs_data_cfg) +{ + if (is_metric_event(event)) { + *pebs_data_cfg |= PEBS_DATACFG_METRICS; + return; + } + + *pebs_data_cfg |= PEBS_DATACFG_CNTR; + + if (idx >= INTEL_PMC_IDX_FIXED) + *pebs_data_cfg |= PEBS_DATACFG_FIX_BIT(idx - INTEL_PMC_IDX_FIXED); + else + *pebs_data_cfg |= PEBS_DATACFG_CNTR_BIT(idx); +} + + +void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc) +{ + struct perf_event *event; + u64 pebs_data_cfg = 0; + int i; + + for (i = 0; i < cpuc->n_events; i++) { + event = cpuc->event_list[i]; + if (!is_pebs_counter_event_group(event)) + continue; + __intel_pmu_pebs_update_cfg(event, cpuc->assign[i], &pebs_data_cfg); + } + + if (pebs_data_cfg & ~cpuc->pebs_data_cfg) + cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW; +} + #define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \ PERF_SAMPLE_PHYS_ADDR | \ PERF_SAMPLE_WEIGHT_TYPE | \ @@ -1204,8 +1403,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) * + precise_ip < 2 for the non event IP * + For RTM TSX weight we need GPRs for the abort code. */ - gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_GP_REGS); + gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS)) || + ((sample_type & PERF_SAMPLE_REGS_USER) && + (attr->sample_regs_user & PEBS_GP_REGS)); tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == @@ -1320,7 +1521,7 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) else value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; } - wrmsrl(base + idx, value); + wrmsrq(base + idx, value); } static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc) @@ -1355,9 +1556,9 @@ void intel_pmu_pebs_enable(struct perf_event *event) * hence we need to drain when changing said * size. */ - intel_pmu_drain_large_pebs(cpuc); + intel_pmu_drain_pebs_buffer(); adaptive_pebs_record_size_update(); - wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg); + wrmsrq(MSR_PEBS_DATA_CFG, pebs_data_cfg); cpuc->active_pebs_data_cfg = pebs_data_cfg; } } @@ -1420,7 +1621,7 @@ void intel_pmu_pebs_disable(struct perf_event *event) intel_pmu_pebs_via_pt_disable(event); if (cpuc->enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; } @@ -1430,7 +1631,7 @@ void intel_pmu_pebs_enable_all(void) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (cpuc->pebs_enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); } void intel_pmu_pebs_disable_all(void) @@ -1631,8 +1832,6 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); - data->period = event->hw.last_period; - /* * Use latency for weight (only avail with PEBS-LL) */ @@ -1655,8 +1854,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) - perf_sample_save_callchain(data, event, iregs); + perf_sample_save_callchain(data, event, iregs); /* * We use the interrupt regs as a base because the PEBS record does not @@ -1755,8 +1953,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, if (x86_pmu.intel_cap.pebs_format >= 3) setup_pebs_time(event, data, pebs->tsc); - if (has_branch_stack(event)) - perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); } static void adaptive_pebs_save_regs(struct pt_regs *regs, @@ -1782,14 +1979,89 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs, #endif } +static void intel_perf_event_update_pmc(struct perf_event *event, u64 pmc) +{ + int shift = 64 - x86_pmu.cntval_bits; + struct hw_perf_event *hwc; + u64 delta, prev_pmc; + + /* + * A recorded counter may not have an assigned event in the + * following cases. The value should be dropped. + * - An event is deleted. There is still an active PEBS event. + * The PEBS record doesn't shrink on pmu::del(). + * If the counter of the deleted event once occurred in a PEBS + * record, PEBS still records the counter until the counter is + * reassigned. + * - An event is stopped for some reason, e.g., throttled. + * During this period, another event is added and takes the + * counter of the stopped event. The stopped event is assigned + * to another new and uninitialized counter, since the + * x86_pmu_start(RELOAD) is not invoked for a stopped event. + * The PEBS__DATA_CFG is updated regardless of the event state. + * The uninitialized counter can be recorded in a PEBS record. + * But the cpuc->events[uninitialized_counter] is always NULL, + * because the event is stopped. The uninitialized value is + * safely dropped. + */ + if (!event) + return; + + hwc = &event->hw; + prev_pmc = local64_read(&hwc->prev_count); + + /* Only update the count when the PMU is disabled */ + WARN_ON(this_cpu_read(cpu_hw_events.enabled)); + local64_set(&hwc->prev_count, pmc); + + delta = (pmc << shift) - (prev_pmc << shift); + delta >>= shift; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); +} + +static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc, + struct perf_event *event, + struct pebs_cntr_header *cntr, + void *next_record) +{ + int bit; + + for_each_set_bit(bit, (unsigned long *)&cntr->cntr, INTEL_PMC_MAX_GENERIC) { + intel_perf_event_update_pmc(cpuc->events[bit], *(u64 *)next_record); + next_record += sizeof(u64); + } + + for_each_set_bit(bit, (unsigned long *)&cntr->fixed, INTEL_PMC_MAX_FIXED) { + /* The slots event will be handled with perf_metric later */ + if ((cntr->metrics == INTEL_CNTR_METRICS) && + (bit + INTEL_PMC_IDX_FIXED == INTEL_PMC_IDX_FIXED_SLOTS)) { + next_record += sizeof(u64); + continue; + } + intel_perf_event_update_pmc(cpuc->events[bit + INTEL_PMC_IDX_FIXED], + *(u64 *)next_record); + next_record += sizeof(u64); + } + + /* HW will reload the value right after the overflow. */ + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + local64_set(&event->hw.prev_count, (u64)-event->hw.sample_period); + + if (cntr->metrics == INTEL_CNTR_METRICS) { + static_call(intel_pmu_update_topdown_event) + (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS], + (u64 *)next_record); + next_record += 2 * sizeof(u64); + } +} + #define PEBS_LATENCY_MASK 0xffff -#define PEBS_CACHE_LATENCY_OFFSET 32 -#define PEBS_RETIRE_LATENCY_OFFSET 32 /* * With adaptive PEBS the layout depends on what fields are configured. */ - static void setup_pebs_adaptive_sample_data(struct perf_event *event, struct pt_regs *iregs, void *__pebs, struct perf_sample_data *data, @@ -1798,8 +2070,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct pebs_basic *basic = __pebs; void *next_record = basic + 1; - u64 sample_type; - u64 format_size; + u64 sample_type, format_group; struct pebs_meminfo *meminfo = NULL; struct pebs_gprs *gprs = NULL; struct x86_perf_regs *perf_regs; @@ -1811,9 +2082,8 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_regs->xmm_regs = NULL; sample_type = event->attr.sample_type; - format_size = basic->format_size; + format_group = basic->format_group; perf_sample_data_init(data, 0, event->hw.last_period); - data->period = event->hw.last_period; setup_pebs_time(event, data, basic->tsc); @@ -1823,28 +2093,31 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) - perf_sample_save_callchain(data, event, iregs); + perf_sample_save_callchain(data, event, iregs); *regs = *iregs; /* The ip in basic is EventingIP */ set_linear_ip(regs, basic->ip); regs->flags = PERF_EFLAGS_EXACT; - if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)) - data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK; + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { + if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY) + data->weight.var3_w = basic->retire_latency; + else + data->weight.var3_w = 0; + } /* * The record for MEMINFO is in front of GP * But PERF_SAMPLE_TRANSACTION needs gprs->ax. * Save the pointer here but process later. */ - if (format_size & PEBS_DATACFG_MEMINFO) { + if (format_group & PEBS_DATACFG_MEMINFO) { meminfo = next_record; next_record = meminfo + 1; } - if (format_size & PEBS_DATACFG_GP) { + if (format_group & PEBS_DATACFG_GP) { gprs = next_record; next_record = gprs + 1; @@ -1853,18 +2126,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, regs->flags &= ~PERF_EFLAGS_EXACT; } - if (sample_type & PERF_SAMPLE_REGS_INTR) + if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) adaptive_pebs_save_regs(regs, gprs); } - if (format_size & PEBS_DATACFG_MEMINFO) { + if (format_group & PEBS_DATACFG_MEMINFO) { if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { - u64 weight = meminfo->latency; + u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ? + meminfo->cache_latency : meminfo->mem_latency; - if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) { - data->weight.var2_w = weight & PEBS_LATENCY_MASK; - weight >>= PEBS_CACHE_LATENCY_OFFSET; - } + if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) + data->weight.var2_w = meminfo->instr_latency; /* * Although meminfo::latency is defined as a u64, @@ -1872,12 +2144,13 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, * in practice on Ice Lake and earlier platforms. */ if (sample_type & PERF_SAMPLE_WEIGHT) { - data->weight.full = weight ?: + data->weight.full = latency ?: intel_get_tsx_weight(meminfo->tsx_tuning); } else { - data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?: + data->weight.var1_dw = (u32)latency ?: intel_get_tsx_weight(meminfo->tsx_tuning); } + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; } @@ -1898,16 +2171,16 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, } } - if (format_size & PEBS_DATACFG_XMMS) { + if (format_group & PEBS_DATACFG_XMMS) { struct pebs_xmm *xmm = next_record; next_record = xmm + 1; perf_regs->xmm_regs = xmm->xmm; } - if (format_size & PEBS_DATACFG_LBRS) { + if (format_group & PEBS_DATACFG_LBRS) { struct lbr_entry *lbr = next_record; - int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT) + int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT) & 0xff) + 1; next_record = next_record + num_lbr * sizeof(struct lbr_entry); @@ -1917,11 +2190,33 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, } } - WARN_ONCE(next_record != __pebs + (format_size >> 48), - "PEBS record size %llu, expected %llu, config %llx\n", - format_size >> 48, + if (format_group & (PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS)) { + struct pebs_cntr_header *cntr = next_record; + unsigned int nr; + + next_record += sizeof(struct pebs_cntr_header); + /* + * The PEBS_DATA_CFG is a global register, which is the + * superset configuration for all PEBS events. + * For the PEBS record of non-sample-read group, ignore + * the counter snapshot fields. + */ + if (is_pebs_counter_event_group(event)) { + __setup_pebs_counter_group(cpuc, event, cntr, next_record); + data->sample_flags |= PERF_SAMPLE_READ; + } + + nr = hweight32(cntr->cntr) + hweight32(cntr->fixed); + if (cntr->metrics == INTEL_CNTR_METRICS) + nr += 2; + next_record += nr * sizeof(u64); + } + + WARN_ONCE(next_record != __pebs + basic->format_size, + "PEBS record size %u, expected %llu, config %llx\n", + basic->format_size, (u64)(next_record - __pebs), - basic->format_size); + format_group); } static inline void * @@ -1962,15 +2257,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) return NULL; } -void intel_pmu_auto_reload_read(struct perf_event *event) -{ - WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); - - perf_pmu_disable(event->pmu); - intel_pmu_drain_pebs_buffer(); - perf_pmu_enable(event->pmu); -} - /* * Special variant of intel_pmu_save_and_restart() for auto-reload. */ @@ -1991,7 +2277,7 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) WARN_ON(this_cpu_read(cpu_hw_events.enabled)); prev_raw_count = local64_read(&hwc->prev_count); - rdpmcl(hwc->event_base_rdpmc, new_raw_count); + new_raw_count = rdpmc(hwc->event_base_rdpmc); local64_set(&hwc->prev_count, new_raw_count); /* @@ -2032,46 +2318,33 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) return 0; } +typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *, + struct perf_sample_data *, struct pt_regs *); + +static struct pt_regs dummy_iregs; + static __always_inline void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, + struct pt_regs *regs, struct perf_sample_data *data, - void *base, void *top, - int bit, int count, - void (*setup_sample)(struct perf_event *, - struct pt_regs *, - void *, - struct perf_sample_data *, - struct pt_regs *)) + void *at, + setup_fn setup_sample) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - struct x86_perf_regs perf_regs; - struct pt_regs *regs = &perf_regs.regs; - void *at = get_next_pebs_record_by_bit(base, top, bit); - static struct pt_regs dummy_iregs; - - if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { - /* - * Now, auto-reload is only enabled in fixed period mode. - * The reload value is always hwc->sample_period. - * May need to change it, if auto-reload is enabled in - * freq mode later. - */ - intel_pmu_save_and_restart_reload(event, count); - } else if (!intel_pmu_save_and_restart(event)) - return; - - if (!iregs) - iregs = &dummy_iregs; + setup_sample(event, iregs, at, data, regs); + perf_event_output(event, data, regs); +} - while (count > 1) { - setup_sample(event, iregs, at, data, regs); - perf_event_output(event, data, regs); - at += cpuc->pebs_record_size; - at = get_next_pebs_record_by_bit(at, top, bit); - count--; - } +static __always_inline void +__intel_pmu_pebs_last_event(struct perf_event *event, + struct pt_regs *iregs, + struct pt_regs *regs, + struct perf_sample_data *data, + void *at, + int count, + setup_fn setup_sample) +{ + struct hw_perf_event *hwc = &event->hw; setup_sample(event, iregs, at, data, regs); if (iregs == &dummy_iregs) { @@ -2087,11 +2360,73 @@ __intel_pmu_pebs_event(struct perf_event *event, * All but the last records are processed. * The last one is left to be able to call the overflow handler. */ - if (perf_event_overflow(event, data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, data, regs); + } + + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { + if ((is_pebs_counter_event_group(event))) { + /* + * The value of each sample has been updated when setup + * the corresponding sample data. + */ + perf_event_update_userpage(event); + } else { + /* + * Now, auto-reload is only enabled in fixed period mode. + * The reload value is always hwc->sample_period. + * May need to change it, if auto-reload is enabled in + * freq mode later. + */ + intel_pmu_save_and_restart_reload(event, count); + } + } else { + /* + * For a non-precise event, it's possible the + * counters-snapshotting records a positive value for the + * overflowed event. Then the HW auto-reload mechanism + * reset the counter to 0 immediately, because the + * pebs_event_reset is cleared if the PERF_X86_EVENT_AUTO_RELOAD + * is not set. The counter backwards may be observed in a + * PMI handler. + * + * Since the event value has been updated when processing the + * counters-snapshotting record, only needs to set the new + * period for the counter. + */ + if (is_pebs_counter_event_group(event)) + static_call(x86_pmu_set_period)(event); + else + intel_pmu_save_and_restart(event); } } +static __always_inline void +__intel_pmu_pebs_events(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data, + void *base, void *top, + int bit, int count, + setup_fn setup_sample) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct x86_perf_regs perf_regs; + struct pt_regs *regs = &perf_regs.regs; + void *at = get_next_pebs_record_by_bit(base, top, bit); + int cnt = count; + + if (!iregs) + iregs = &dummy_iregs; + + while (cnt > 1) { + __intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample); + at += cpuc->pebs_record_size; + at = get_next_pebs_record_by_bit(at, top, bit); + cnt--; + } + + __intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample); +} + static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -2126,12 +2461,13 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_ return; } - __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, - setup_pebs_fixed_sample_data); + __intel_pmu_pebs_events(event, iregs, data, at, top, 0, n, + setup_pebs_fixed_sample_data); } -static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size) +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, u64 mask) { + u64 pebs_enabled = cpuc->pebs_enabled & mask; struct perf_event *event; int bit; @@ -2142,7 +2478,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int * It needs to call intel_pmu_save_and_restart_reload() to * update the event->count for this case. */ - for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) { + for_each_set_bit(bit, (unsigned long *)&pebs_enabled, X86_PMC_IDX_MAX) { event = cpuc->events[bit]; if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) intel_pmu_save_and_restart_reload(event, 0); @@ -2157,6 +2493,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d void *base, *at, *top; short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + int max_pebs_events = intel_pmu_max_num_pebs(NULL); int bit, i, size; u64 mask; @@ -2168,15 +2505,15 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d ds->pebs_index = ds->pebs_buffer_base; - mask = (1ULL << x86_pmu.max_pebs_events) - 1; - size = x86_pmu.max_pebs_events; + mask = x86_pmu.pebs_events_mask; + size = max_pebs_events; if (x86_pmu.flags & PMU_FL_PEBS_ALL) { - mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED; - size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; + mask |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED; + size = INTEL_PMC_IDX_FIXED + x86_pmu_max_num_counters_fixed(NULL); } if (unlikely(base >= top)) { - intel_pmu_pebs_event_update_no_drain(cpuc, size); + intel_pmu_pebs_event_update_no_drain(cpuc, mask); return; } @@ -2208,8 +2545,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d pebs_status = p->status = cpuc->pebs_enabled; bit = find_first_bit((unsigned long *)&pebs_status, - x86_pmu.max_pebs_events); - if (bit >= x86_pmu.max_pebs_events) + max_pebs_events); + + if (!(x86_pmu.pebs_events_mask & (1 << bit))) continue; /* @@ -2251,14 +2589,14 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (iregs && perf_event_account_interrupt(event)) - x86_pmu_stop(event, 0); + if (iregs) + perf_event_account_interrupt(event); } if (counts[bit]) { - __intel_pmu_pebs_event(event, iregs, data, base, - top, bit, counts[bit], - setup_pebs_fixed_sample_data); + __intel_pmu_pebs_events(event, iregs, data, base, + top, bit, counts[bit], + setup_pebs_fixed_sample_data); } } } @@ -2266,13 +2604,15 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) { short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS]; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - int max_pebs_events = hybrid(cpuc->pmu, max_pebs_events); - int num_counters_fixed = hybrid(cpuc->pmu, num_counters_fixed); struct debug_store *ds = cpuc->ds; + struct x86_perf_regs perf_regs; + struct pt_regs *regs = &perf_regs.regs; + struct pebs_basic *basic; struct perf_event *event; void *base, *at, *top; - int bit, size; + int bit; u64 mask; if (!x86_pmu.pebs_active) @@ -2283,47 +2623,57 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d ds->pebs_index = ds->pebs_buffer_base; - mask = ((1ULL << max_pebs_events) - 1) | - (((1ULL << num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED); - size = INTEL_PMC_IDX_FIXED + num_counters_fixed; + mask = hybrid(cpuc->pmu, pebs_events_mask) | + (hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED); if (unlikely(base >= top)) { - intel_pmu_pebs_event_update_no_drain(cpuc, size); + intel_pmu_pebs_event_update_no_drain(cpuc, mask); return; } - for (at = base; at < top; at += cpuc->pebs_record_size) { + if (!iregs) + iregs = &dummy_iregs; + + /* Process all but the last event for each counter. */ + for (at = base; at < top; at += basic->format_size) { u64 pebs_status; - pebs_status = get_pebs_status(at) & cpuc->pebs_enabled; - pebs_status &= mask; + basic = at; + if (basic->format_size != cpuc->pebs_record_size) + continue; + + pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask; + for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) { + event = cpuc->events[bit]; + + if (WARN_ON_ONCE(!event) || + WARN_ON_ONCE(!event->attr.precise_ip)) + continue; - for_each_set_bit(bit, (unsigned long *)&pebs_status, size) - counts[bit]++; + if (counts[bit]++) { + __intel_pmu_pebs_event(event, iregs, regs, data, last[bit], + setup_pebs_adaptive_sample_data); + } + last[bit] = at; + } } - for_each_set_bit(bit, (unsigned long *)&mask, size) { - if (counts[bit] == 0) + for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) { + if (!counts[bit]) continue; event = cpuc->events[bit]; - if (WARN_ON_ONCE(!event)) - continue; - - if (WARN_ON_ONCE(!event->attr.precise_ip)) - continue; - __intel_pmu_pebs_event(event, iregs, data, base, - top, bit, counts[bit], - setup_pebs_adaptive_sample_data); + __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit], + counts[bit], setup_pebs_adaptive_sample_data); } } /* - * BTS, PEBS probe and setup + * PEBS probe and setup */ -void __init intel_ds_init(void) +void __init intel_pebs_init(void) { /* * No support for 32bit formats @@ -2331,13 +2681,12 @@ void __init intel_ds_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64)) return; - x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); - x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - if (x86_pmu.pebs) { + if (x86_pmu.ds_pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; int format = x86_pmu.intel_cap.pebs_format; @@ -2345,6 +2694,11 @@ void __init intel_ds_init(void) if (format < 4) x86_pmu.intel_cap.pebs_baseline = 0; + x86_pmu.pebs_enable = intel_pmu_pebs_enable; + x86_pmu.pebs_disable = intel_pmu_pebs_disable; + x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all; + x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all; + switch (format) { case 0: pr_cont("PEBS fmt0%c, ", pebs_type); @@ -2380,6 +2734,12 @@ void __init intel_ds_init(void) x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; break; + case 6: + if (x86_pmu.intel_cap.pebs_baseline) { + x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ; + x86_pmu.late_setup = intel_pmu_late_setup; + } + fallthrough; case 5: x86_pmu.pebs_ept = 1; fallthrough; @@ -2404,9 +2764,17 @@ void __init intel_ds_init(void) PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); } - pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); + pr_cont("PEBS fmt%d%c%s, ", format, pebs_type, pebs_qual); - if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) { + /* + * The PEBS-via-PT is not supported on hybrid platforms, + * because not all CPUs of a hybrid machine support it. + * The global x86_pmu.intel_cap, which only contains the + * common capabilities, is used to check the availability + * of the feature. The per-PMU pebs_output_pt_available + * in a hybrid machine should be ignored. + */ + if (x86_pmu.intel_cap.pebs_output_pt_available) { pr_cont("PEBS-via-PT, "); x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; } @@ -2415,7 +2783,7 @@ void __init intel_ds_init(void) default: pr_cont("no PEBS fmt%d%c, ", format, pebs_type); - x86_pmu.pebs = 0; + x86_pmu.ds_pebs = 0; } } } @@ -2424,8 +2792,8 @@ void perf_restore_debug_store(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); - if (!x86_pmu.bts && !x86_pmu.pebs) + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; - wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); + wrmsrq(MSR_IA32_DS_AREA, (unsigned long)ds); } diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c index 618001c208e8..e614baf42926 100644 --- a/arch/x86/events/intel/knc.c +++ b/arch/x86/events/intel/knc.c @@ -5,6 +5,7 @@ #include <linux/types.h> #include <asm/hardirq.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -159,18 +160,18 @@ static void knc_pmu_disable_all(void) { u64 val; - rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); - wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); } static void knc_pmu_enable_all(int added) { u64 val; - rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); - wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); } static inline void @@ -182,7 +183,7 @@ knc_pmu_disable_event(struct perf_event *event) val = hwc->config; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); + (void)wrmsrq_safe(hwc->config_base + hwc->idx, val); } static void knc_pmu_enable_event(struct perf_event *event) @@ -193,21 +194,21 @@ static void knc_pmu_enable_event(struct perf_event *event) val = hwc->config; val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); + (void)wrmsrq_safe(hwc->config_base + hwc->idx, val); } static inline u64 knc_pmu_get_status(void) { u64 status; - rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); + rdmsrq(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); return status; } static inline void knc_pmu_ack_status(u64 ack) { - wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); + wrmsrq(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); } static int knc_pmu_handle_irq(struct pt_regs *regs) @@ -241,19 +242,20 @@ again: for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + u64 last_period; handled++; if (!test_bit(bit, cpuc->active_mask)) continue; + last_period = event->hw.last_period; if (!intel_pmu_save_and_restart(event)) continue; - perf_sample_data_init(&data, 0, event->hw.last_period); + perf_sample_data_init(&data, 0, last_period); - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* @@ -303,7 +305,7 @@ static const struct x86_pmu knc_pmu __initconst = { .apic = 1, .max_period = (1ULL << 39) - 1, .version = 0, - .num_counters = 2, + .cntr_mask64 = 0x3, .cntval_bits = 40, .cntval_mask = (1ULL << 40) - 1, .get_event_constraints = x86_get_event_constraints, diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index dc641b50814e..7aa59966e7c3 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -137,9 +137,9 @@ static void __intel_pmu_lbr_enable(bool pmi) if (cpuc->lbr_sel) lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask; if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && !pmi && cpuc->lbr_sel) - wrmsrl(MSR_LBR_SELECT, lbr_select); + wrmsrq(MSR_LBR_SELECT, lbr_select); - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); orig_debugctl = debugctl; if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) @@ -155,10 +155,10 @@ static void __intel_pmu_lbr_enable(bool pmi) debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; if (orig_debugctl != debugctl) - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); if (static_cpu_has(X86_FEATURE_ARCH_LBR)) - wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); + wrmsrq(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN); } void intel_pmu_lbr_reset_32(void) @@ -166,7 +166,7 @@ void intel_pmu_lbr_reset_32(void) int i; for (i = 0; i < x86_pmu.lbr_nr; i++) - wrmsrl(x86_pmu.lbr_from + i, 0); + wrmsrq(x86_pmu.lbr_from + i, 0); } void intel_pmu_lbr_reset_64(void) @@ -174,17 +174,17 @@ void intel_pmu_lbr_reset_64(void) int i; for (i = 0; i < x86_pmu.lbr_nr; i++) { - wrmsrl(x86_pmu.lbr_from + i, 0); - wrmsrl(x86_pmu.lbr_to + i, 0); + wrmsrq(x86_pmu.lbr_from + i, 0); + wrmsrq(x86_pmu.lbr_to + i, 0); if (x86_pmu.lbr_has_info) - wrmsrl(x86_pmu.lbr_info + i, 0); + wrmsrq(x86_pmu.lbr_info + i, 0); } } static void intel_pmu_arch_lbr_reset(void) { /* Write to ARCH_LBR_DEPTH MSR, all LBR entries are reset to 0 */ - wrmsrl(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); + wrmsrq(MSR_ARCH_LBR_DEPTH, x86_pmu.lbr_nr); } void intel_pmu_lbr_reset(void) @@ -199,7 +199,7 @@ void intel_pmu_lbr_reset(void) cpuc->last_task_ctx = NULL; cpuc->last_log_id = 0; if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select) - wrmsrl(MSR_LBR_SELECT, 0); + wrmsrq(MSR_LBR_SELECT, 0); } /* @@ -209,7 +209,7 @@ static inline u64 intel_pmu_lbr_tos(void) { u64 tos; - rdmsrl(x86_pmu.lbr_tos, tos); + rdmsrq(x86_pmu.lbr_tos, tos); return tos; } @@ -282,17 +282,17 @@ static u64 lbr_from_signext_quirk_rd(u64 val) static __always_inline void wrlbr_from(unsigned int idx, u64 val) { val = lbr_from_signext_quirk_wr(val); - wrmsrl(x86_pmu.lbr_from + idx, val); + wrmsrq(x86_pmu.lbr_from + idx, val); } static __always_inline void wrlbr_to(unsigned int idx, u64 val) { - wrmsrl(x86_pmu.lbr_to + idx, val); + wrmsrq(x86_pmu.lbr_to + idx, val); } static __always_inline void wrlbr_info(unsigned int idx, u64 val) { - wrmsrl(x86_pmu.lbr_info + idx, val); + wrmsrq(x86_pmu.lbr_info + idx, val); } static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) @@ -302,7 +302,7 @@ static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr) if (lbr) return lbr->from; - rdmsrl(x86_pmu.lbr_from + idx, val); + rdmsrq(x86_pmu.lbr_from + idx, val); return lbr_from_signext_quirk_rd(val); } @@ -314,7 +314,7 @@ static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr) if (lbr) return lbr->to; - rdmsrl(x86_pmu.lbr_to + idx, val); + rdmsrq(x86_pmu.lbr_to + idx, val); return val; } @@ -326,7 +326,7 @@ static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr) if (lbr) return lbr->info; - rdmsrl(x86_pmu.lbr_info + idx, val); + rdmsrq(x86_pmu.lbr_info + idx, val); return val; } @@ -380,10 +380,10 @@ void intel_pmu_lbr_restore(void *ctx) wrlbr_info(lbr_idx, 0); } - wrmsrl(x86_pmu.lbr_tos, tos); + wrmsrq(x86_pmu.lbr_tos, tos); if (cpuc->lbr_select) - wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); + wrmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel); } static void intel_pmu_arch_lbr_restore(void *ctx) @@ -422,11 +422,17 @@ static __always_inline bool lbr_is_reset_in_cstate(void *ctx) return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL); } +static inline bool has_lbr_callstack_users(void *ctx) +{ + return task_context_opt(ctx)->lbr_callstack_users || + x86_pmu.lbr_callstack_users; +} + static void __intel_pmu_lbr_restore(void *ctx) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (task_context_opt(ctx)->lbr_callstack_users == 0 || + if (!has_lbr_callstack_users(ctx) || task_context_opt(ctx)->lbr_stack_state == LBR_NONE) { intel_pmu_lbr_reset(); return; @@ -469,7 +475,7 @@ void intel_pmu_lbr_save(void *ctx) task_ctx->tos = tos; if (cpuc->lbr_select) - rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel); + rdmsrq(MSR_LBR_SELECT, task_ctx->lbr_sel); } static void intel_pmu_arch_lbr_save(void *ctx) @@ -503,7 +509,7 @@ static void __intel_pmu_lbr_save(void *ctx) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - if (task_context_opt(ctx)->lbr_callstack_users == 0) { + if (!has_lbr_callstack_users(ctx)) { task_context_opt(ctx)->lbr_stack_state = LBR_NONE; return; } @@ -516,32 +522,11 @@ static void __intel_pmu_lbr_save(void *ctx) cpuc->last_log_id = ++task_context_opt(ctx)->log_id; } -void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc) -{ - void *prev_ctx_data, *next_ctx_data; - - swap(prev_epc->task_ctx_data, next_epc->task_ctx_data); - - /* - * Architecture specific synchronization makes sense in case - * both prev_epc->task_ctx_data and next_epc->task_ctx_data - * pointers are allocated. - */ - - prev_ctx_data = next_epc->task_ctx_data; - next_ctx_data = prev_epc->task_ctx_data; - - if (!prev_ctx_data || !next_ctx_data) - return; - - swap(task_context_opt(prev_ctx_data)->lbr_callstack_users, - task_context_opt(next_ctx_data)->lbr_callstack_users); -} - -void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_ctx_data *ctx_data; void *task_ctx; if (!cpuc->lbr_users) @@ -552,14 +537,18 @@ void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched * the task was scheduled out, restore the stack. Otherwise flush * the LBR stack. */ - task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL; + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + task_ctx = ctx_data ? ctx_data->data : NULL; if (task_ctx) { if (sched_in) __intel_pmu_lbr_restore(task_ctx); else __intel_pmu_lbr_save(task_ctx); + rcu_read_unlock(); return; } + rcu_read_unlock(); /* * Since a context switch can flip the address space and LBR entries @@ -588,9 +577,19 @@ void intel_pmu_lbr_add(struct perf_event *event) cpuc->br_sel = event->hw.branch_reg.reg; - if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data) - task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++; + if (branch_user_callstack(cpuc->br_sel)) { + if (event->attach_state & PERF_ATTACH_TASK) { + struct task_struct *task = event->hw.target; + struct perf_ctx_data *ctx_data; + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + if (ctx_data) + task_context_opt(ctx_data->data)->lbr_callstack_users++; + rcu_read_unlock(); + } else + x86_pmu.lbr_callstack_users++; + } /* * Request pmu::sched_task() callback, which will fire inside the * regular perf event scheduling, so that call will: @@ -664,9 +663,19 @@ void intel_pmu_lbr_del(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - if (branch_user_callstack(cpuc->br_sel) && - event->pmu_ctx->task_ctx_data) - task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--; + if (branch_user_callstack(cpuc->br_sel)) { + if (event->attach_state & PERF_ATTACH_TASK) { + struct task_struct *task = event->hw.target; + struct perf_ctx_data *ctx_data; + + rcu_read_lock(); + ctx_data = rcu_dereference(task->perf_ctx_data); + if (ctx_data) + task_context_opt(ctx_data->data)->lbr_callstack_users--; + rcu_read_unlock(); + } else + x86_pmu.lbr_callstack_users--; + } if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT) cpuc->lbr_select = 0; @@ -743,7 +752,7 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) u64 lbr; } msr_lastbranch; - rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); + rdmsrq(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); perf_clear_branch_entry_bitfields(br); @@ -1593,7 +1602,7 @@ void __init intel_pmu_arch_lbr_init(void) goto clear_arch_lbr; /* Apply the max depth of Arch LBR */ - if (wrmsrl_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) + if (wrmsrq_safe(MSR_ARCH_LBR_DEPTH, lbr_nr)) goto clear_arch_lbr; x86_pmu.lbr_depth_mask = eax.split.lbr_depth_mask; @@ -1609,7 +1618,7 @@ void __init intel_pmu_arch_lbr_init(void) x86_pmu.lbr_nr = lbr_nr; if (!!x86_pmu.lbr_counters) - x86_pmu.flags |= PMU_FL_BR_CNTR; + x86_pmu.flags |= PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT; if (x86_pmu.lbr_mispred) static_branch_enable(&x86_lbr_mispred); diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 35936188db01..e5fd7367e45d 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -10,8 +10,10 @@ #include <linux/perf_event.h> #include <asm/perf_event_p4.h> +#include <asm/cpu_device_id.h> #include <asm/hardirq.h> #include <asm/apic.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -732,9 +734,9 @@ static bool p4_event_match_cpu_model(unsigned int event_idx) { /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ if (event_idx == P4_EVENT_INSTR_COMPLETED) { - if (boot_cpu_data.x86_model != 3 && - boot_cpu_data.x86_model != 4 && - boot_cpu_data.x86_model != 6) + if (boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT && + boot_cpu_data.x86_vfm != INTEL_P4_PRESCOTT_2M && + boot_cpu_data.x86_vfm != INTEL_P4_CEDARMILL) return false; } @@ -776,7 +778,7 @@ static int p4_validate_raw_event(struct perf_event *event) * the user needs special permissions to be able to use it */ if (p4_ht_active() && p4_event_bind_map[v].shared) { - v = perf_allow_cpu(&event->attr); + v = perf_allow_cpu(); if (v) return v; } @@ -858,9 +860,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) u64 v; /* an official way for overflow indication */ - rdmsrl(hwc->config_base, v); + rdmsrq(hwc->config_base, v); if (v & P4_CCCR_OVF) { - wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); + wrmsrq(hwc->config_base, v & ~P4_CCCR_OVF); return 1; } @@ -871,7 +873,7 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) * the counter has reached zero value and continued counting before * real NMI signal was received: */ - rdmsrl(hwc->event_base, v); + rdmsrq(hwc->event_base, v); if (!(v & ARCH_P4_UNFLAGGED_BIT)) return 1; @@ -896,8 +898,8 @@ static void p4_pmu_disable_pebs(void) * So at moment let leave metrics turned on forever -- it's * ok for now but need to be revisited! * - * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0); - * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0); + * (void)wrmsrq_safe(MSR_IA32_PEBS_ENABLE, 0); + * (void)wrmsrq_safe(MSR_P4_PEBS_MATRIX_VERT, 0); */ } @@ -910,7 +912,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) * state we need to clear P4_CCCR_OVF, otherwise interrupt get * asserted again and again */ - (void)wrmsrl_safe(hwc->config_base, + (void)wrmsrq_safe(hwc->config_base, p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); } @@ -919,7 +921,7 @@ static void p4_pmu_disable_all(void) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[idx]; if (!test_bit(idx, cpuc->active_mask)) continue; @@ -943,8 +945,8 @@ static void p4_pmu_enable_pebs(u64 config) bind = &p4_pebs_bind_map[idx]; - (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); - (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); + (void)wrmsrq_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); + (void)wrmsrq_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); } static void __p4_pmu_enable_event(struct perf_event *event) @@ -978,8 +980,8 @@ static void __p4_pmu_enable_event(struct perf_event *event) */ p4_pmu_enable_pebs(hwc->config); - (void)wrmsrl_safe(escr_addr, escr_conf); - (void)wrmsrl_safe(hwc->config_base, + (void)wrmsrq_safe(escr_addr, escr_conf); + (void)wrmsrq_safe(hwc->config_base, (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } @@ -998,7 +1000,7 @@ static void p4_pmu_enable_all(int added) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[idx]; if (!test_bit(idx, cpuc->active_mask)) continue; @@ -1023,7 +1025,7 @@ static int p4_pmu_set_period(struct perf_event *event) * * the former idea is taken from OProfile code */ - wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + wrmsrq(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); } return ret; @@ -1040,7 +1042,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) cpuc = this_cpu_ptr(&cpu_hw_events); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { int overflow; if (!test_bit(idx, cpuc->active_mask)) { @@ -1071,8 +1073,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) continue; - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } if (handled) @@ -1353,7 +1354,7 @@ static __initconst const struct x86_pmu p4_pmu = { * though leave it restricted at moment assuming * HT is on */ - .num_counters = ARCH_P4_MAX_CCCR, + .cntr_mask64 = GENMASK_ULL(ARCH_P4_MAX_CCCR - 1, 0), .apic = 1, .cntval_bits = ARCH_P4_CNTRVAL_BITS, .cntval_mask = ARCH_P4_CNTRVAL_MASK, @@ -1395,9 +1396,9 @@ __init int p4_pmu_init(void) * * Solve this by zero'ing out the registers to mimic a reset. */ - for (i = 0; i < x86_pmu.num_counters; i++) { + for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { reg = x86_pmu_config_addr(i); - wrmsrl_safe(reg, 0ULL); + wrmsrq_safe(reg, 0ULL); } return 0; diff --git a/arch/x86/events/intel/p6.c b/arch/x86/events/intel/p6.c index 408879b0c0d4..6e41de355bd8 100644 --- a/arch/x86/events/intel/p6.c +++ b/arch/x86/events/intel/p6.c @@ -2,6 +2,9 @@ #include <linux/perf_event.h> #include <linux/types.h> +#include <asm/cpu_device_id.h> +#include <asm/msr.h> + #include "../perf_event.h" /* @@ -140,9 +143,9 @@ static void p6_pmu_disable_all(void) u64 val; /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); + rdmsrq(MSR_P6_EVNTSEL0, val); val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); + wrmsrq(MSR_P6_EVNTSEL0, val); } static void p6_pmu_enable_all(int added) @@ -150,9 +153,9 @@ static void p6_pmu_enable_all(int added) unsigned long val; /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); + rdmsrq(MSR_P6_EVNTSEL0, val); val |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); + wrmsrq(MSR_P6_EVNTSEL0, val); } static inline void @@ -161,7 +164,7 @@ p6_pmu_disable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 val = P6_NOP_EVENT; - (void)wrmsrl_safe(hwc->config_base, val); + (void)wrmsrq_safe(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) @@ -178,7 +181,7 @@ static void p6_pmu_enable_event(struct perf_event *event) * to actually enable the events. */ - (void)wrmsrl_safe(hwc->config_base, val); + (void)wrmsrq_safe(hwc->config_base, val); } PMU_FORMAT_ATTR(event, "config:0-7" ); @@ -214,7 +217,7 @@ static __initconst const struct x86_pmu p6_pmu = { .apic = 1, .max_period = (1ULL << 31) - 1, .version = 0, - .num_counters = 2, + .cntr_mask64 = 0x3, /* * Events have 40 bits implemented. However they are designed such * that bits [32-39] are sign extensions of bit 31. As such the @@ -248,30 +251,8 @@ __init int p6_pmu_init(void) { x86_pmu = p6_pmu; - switch (boot_cpu_data.x86_model) { - case 1: /* Pentium Pro */ + if (boot_cpu_data.x86_vfm == INTEL_PENTIUM_PRO) x86_add_quirk(p6_pmu_rdpmc_quirk); - break; - - case 3: /* Pentium II - Klamath */ - case 5: /* Pentium II - Deschutes */ - case 6: /* Pentium II - Mendocino */ - break; - - case 7: /* Pentium III - Katmai */ - case 8: /* Pentium III - Coppermine */ - case 10: /* Pentium III Xeon */ - case 11: /* Pentium III - Tualatin */ - break; - - case 9: /* Pentium M - Banias */ - case 13: /* Pentium M - Dothan */ - break; - - default: - pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); - return -ENODEV; - } memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 14db6d9d318b..e8cf29d2b10c 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -18,11 +18,13 @@ #include <linux/slab.h> #include <linux/device.h> +#include <asm/cpuid/api.h> #include <asm/perf_event.h> #include <asm/insn.h> #include <asm/io.h> #include <asm/intel_pt.h> #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "../perf_event.h" #include "pt.h" @@ -193,7 +195,7 @@ static int __init pt_pmu_hw_init(void) int ret; long i; - rdmsrl(MSR_PLATFORM_INFO, reg); + rdmsrq(MSR_PLATFORM_INFO, reg); pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8; /* @@ -201,10 +203,10 @@ static int __init pt_pmu_hw_init(void) * otherwise, zero for numerator stands for "not enumerated" * as per SDM */ - if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) { + if (boot_cpu_data.cpuid_level >= CPUID_LEAF_TSC) { u32 eax, ebx, ecx, edx; - cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx); + cpuid(CPUID_LEAF_TSC, &eax, &ebx, &ecx, &edx); pt_pmu.tsc_art_num = ebx; pt_pmu.tsc_art_den = eax; @@ -229,7 +231,7 @@ static int __init pt_pmu_hw_init(void) * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace * post-VMXON. */ - rdmsrl(MSR_IA32_VMX_MISC, reg); + rdmsrq(MSR_IA32_VMX_MISC, reg); if (reg & BIT(14)) pt_pmu.vmx = true; } @@ -416,15 +418,18 @@ static bool pt_event_valid(struct perf_event *event) static void pt_config_start(struct perf_event *event) { struct pt *pt = this_cpu_ptr(&pt_ctx); - u64 ctl = event->hw.config; + u64 ctl = event->hw.aux_config; + + if (READ_ONCE(event->hw.aux_paused)) + return; ctl |= RTIT_CTL_TRACEEN; if (READ_ONCE(pt->vmx_on)) perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL); else - wrmsrl(MSR_IA32_RTIT_CTL, ctl); + wrmsrq(MSR_IA32_RTIT_CTL, ctl); - WRITE_ONCE(event->hw.config, ctl); + WRITE_ONCE(event->hw.aux_config, ctl); } /* Address ranges and their corresponding msr configuration registers */ @@ -481,12 +486,12 @@ static u64 pt_config_filters(struct perf_event *event) /* avoid redundant msr writes */ if (pt->filters.filter[range].msr_a != filter->msr_a) { - wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a); + wrmsrq(pt_address_ranges[range].msr_a, filter->msr_a); pt->filters.filter[range].msr_a = filter->msr_a; } if (pt->filters.filter[range].msr_b != filter->msr_b) { - wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b); + wrmsrq(pt_address_ranges[range].msr_b, filter->msr_b); pt->filters.filter[range].msr_b = filter->msr_b; } @@ -503,9 +508,9 @@ static void pt_config(struct perf_event *event) u64 reg; /* First round: clear STATUS, in particular the PSB byte counter. */ - if (!event->hw.config) { + if (!event->hw.aux_config) { perf_event_itrace_started(event); - wrmsrl(MSR_IA32_RTIT_STATUS, 0); + wrmsrq(MSR_IA32_RTIT_STATUS, 0); } reg = pt_config_filters(event); @@ -533,14 +538,31 @@ static void pt_config(struct perf_event *event) reg |= (event->attr.config & PT_CONFIG_MASK); - event->hw.config = reg; + event->hw.aux_config = reg; + + /* + * Allow resume before starting so as not to overwrite a value set by a + * PMI. + */ + barrier(); + WRITE_ONCE(pt->resume_allowed, 1); + /* Configuration is complete, it is now OK to handle an NMI */ + barrier(); + WRITE_ONCE(pt->handle_nmi, 1); + barrier(); pt_config_start(event); + barrier(); + /* + * Allow pause after starting so its pt_config_stop() doesn't race with + * pt_config_start(). + */ + WRITE_ONCE(pt->pause_allowed, 1); } static void pt_config_stop(struct perf_event *event) { struct pt *pt = this_cpu_ptr(&pt_ctx); - u64 ctl = READ_ONCE(event->hw.config); + u64 ctl = READ_ONCE(event->hw.aux_config); /* may be already stopped by a PMI */ if (!(ctl & RTIT_CTL_TRACEEN)) @@ -548,9 +570,9 @@ static void pt_config_stop(struct perf_event *event) ctl &= ~RTIT_CTL_TRACEEN; if (!READ_ONCE(pt->vmx_on)) - wrmsrl(MSR_IA32_RTIT_CTL, ctl); + wrmsrq(MSR_IA32_RTIT_CTL, ctl); - WRITE_ONCE(event->hw.config, ctl); + WRITE_ONCE(event->hw.aux_config, ctl); /* * A wrmsr that disables trace generation serializes other PT @@ -637,13 +659,13 @@ static void pt_config_buffer(struct pt_buffer *buf) reg = virt_to_phys(base); if (pt->output_base != reg) { pt->output_base = reg; - wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg); + wrmsrq(MSR_IA32_RTIT_OUTPUT_BASE, reg); } reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32); if (pt->output_mask != reg) { pt->output_mask = reg; - wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); + wrmsrq(MSR_IA32_RTIT_OUTPUT_MASK, reg); } } @@ -828,11 +850,13 @@ static void pt_buffer_advance(struct pt_buffer *buf) buf->cur_idx++; if (buf->cur_idx == buf->cur->last) { - if (buf->cur == buf->last) + if (buf->cur == buf->last) { buf->cur = buf->first; - else + buf->wrapped = true; + } else { buf->cur = list_entry(buf->cur->list.next, struct topa, list); + } buf->cur_idx = 0; } } @@ -846,8 +870,11 @@ static void pt_buffer_advance(struct pt_buffer *buf) static void pt_update_head(struct pt *pt) { struct pt_buffer *buf = perf_get_aux(&pt->handle); + bool wrapped = buf->wrapped; u64 topa_idx, base, old; + buf->wrapped = false; + if (buf->single) { local_set(&buf->data_size, buf->output_off); return; @@ -865,7 +892,7 @@ static void pt_update_head(struct pt *pt) } else { old = (local64_xchg(&buf->head, base) & ((buf->nr_pages << PAGE_SHIFT) - 1)); - if (base < old) + if (base < old || (base == old && wrapped)) base += buf->nr_pages << PAGE_SHIFT; local_add(base - old, &buf->data_size); @@ -878,7 +905,7 @@ static void pt_update_head(struct pt *pt) */ static void *pt_buffer_region(struct pt_buffer *buf) { - return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); + return phys_to_virt((phys_addr_t)TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT); } /** @@ -900,7 +927,7 @@ static void pt_handle_status(struct pt *pt) int advance = 0; u64 status; - rdmsrl(MSR_IA32_RTIT_STATUS, status); + rdmsrq(MSR_IA32_RTIT_STATUS, status); if (status & RTIT_STATUS_ERROR) { pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n"); @@ -944,7 +971,7 @@ static void pt_handle_status(struct pt *pt) if (advance) pt_buffer_advance(buf); - wrmsrl(MSR_IA32_RTIT_STATUS, status); + wrmsrq(MSR_IA32_RTIT_STATUS, status); } /** @@ -959,12 +986,12 @@ static void pt_read_offset(struct pt_buffer *buf) struct topa_page *tp; if (!buf->single) { - rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base); + rdmsrq(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base); tp = phys_to_virt(pt->output_base); buf->cur = &tp->topa; } - rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask); + rdmsrq(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask); /* offset within current output region */ buf->output_off = pt->output_mask >> 32; /* index of current output region within this table */ @@ -990,7 +1017,7 @@ pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg) * order allocations, there shouldn't be many of these. */ list_for_each_entry(topa, &buf->tables, list) { - if (topa->offset + topa->size > pg << PAGE_SHIFT) + if (topa->offset + topa->size > (unsigned long)pg << PAGE_SHIFT) goto found; } @@ -1511,6 +1538,7 @@ void intel_pt_interrupt(void) buf = perf_aux_output_begin(&pt->handle, event); if (!buf) { event->hw.state = PERF_HES_STOPPED; + WRITE_ONCE(pt->resume_allowed, 0); return; } @@ -1519,6 +1547,7 @@ void intel_pt_interrupt(void) ret = pt_buffer_reset_markers(buf, &pt->handle); if (ret) { perf_aux_output_end(&pt->handle, 0); + WRITE_ONCE(pt->resume_allowed, 0); return; } @@ -1557,7 +1586,7 @@ void intel_pt_handle_vmx(int on) /* Turn PTs back on */ if (!on && event) - wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config); + wrmsrq(MSR_IA32_RTIT_CTL, event->hw.aux_config); local_irq_restore(flags); } @@ -1573,6 +1602,26 @@ static void pt_event_start(struct perf_event *event, int mode) struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt_buffer *buf; + if (mode & PERF_EF_RESUME) { + if (READ_ONCE(pt->resume_allowed)) { + u64 status; + + /* + * Only if the trace is not active and the error and + * stopped bits are clear, is it safe to start, but a + * PMI might have just cleared these, so resume_allowed + * must be checked again also. + */ + rdmsrq(MSR_IA32_RTIT_STATUS, status); + if (!(status & (RTIT_STATUS_TRIGGEREN | + RTIT_STATUS_ERROR | + RTIT_STATUS_STOPPED)) && + READ_ONCE(pt->resume_allowed)) + pt_config_start(event); + } + return; + } + buf = perf_aux_output_begin(&pt->handle, event); if (!buf) goto fail_stop; @@ -1583,7 +1632,6 @@ static void pt_event_start(struct perf_event *event, int mode) goto fail_end_stop; } - WRITE_ONCE(pt->handle_nmi, 1); hwc->state = 0; pt_config_buffer(buf); @@ -1601,11 +1649,27 @@ static void pt_event_stop(struct perf_event *event, int mode) { struct pt *pt = this_cpu_ptr(&pt_ctx); + if (mode & PERF_EF_PAUSE) { + if (READ_ONCE(pt->pause_allowed)) + pt_config_stop(event); + return; + } + /* * Protect against the PMI racing with disabling wrmsr, * see comment in intel_pt_interrupt(). */ WRITE_ONCE(pt->handle_nmi, 0); + barrier(); + + /* + * Prevent a resume from attempting to restart tracing, or a pause + * during a subsequent start. Do this after clearing handle_nmi so that + * pt_event_snapshot_aux() will not re-allow them. + */ + WRITE_ONCE(pt->pause_allowed, 0); + WRITE_ONCE(pt->resume_allowed, 0); + barrier(); pt_config_stop(event); @@ -1656,12 +1720,15 @@ static long pt_event_snapshot_aux(struct perf_event *event, if (WARN_ON_ONCE(!buf->snapshot)) return 0; + /* Prevent pause/resume from attempting to start/stop tracing */ + WRITE_ONCE(pt->pause_allowed, 0); + WRITE_ONCE(pt->resume_allowed, 0); + barrier(); /* - * Here, handle_nmi tells us if the tracing is on + * There is no PT interrupt in this mode, so stop the trace and it will + * remain stopped while the buffer is copied. */ - if (READ_ONCE(pt->handle_nmi)) - pt_config_stop(event); - + pt_config_stop(event); pt_read_offset(buf); pt_update_head(pt); @@ -1673,12 +1740,16 @@ static long pt_event_snapshot_aux(struct perf_event *event, ret = perf_output_copy_aux(&pt->handle, handle, from, to); /* - * If the tracing was on when we turned up, restart it. - * Compiler barrier not needed as we couldn't have been - * preempted by anything that touches pt->handle_nmi. + * Here, handle_nmi tells us if the tracing was on. + * If the tracing was on, restart it. */ - if (pt->handle_nmi) + if (READ_ONCE(pt->handle_nmi)) { + WRITE_ONCE(pt->resume_allowed, 1); + barrier(); pt_config_start(event); + barrier(); + WRITE_ONCE(pt->pause_allowed, 1); + } return ret; } @@ -1769,7 +1840,7 @@ static __init int pt_init(void) for_each_online_cpu(cpu) { u64 ctl; - ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl); + ret = rdmsrq_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl); if (!ret && (ctl & RTIT_CTL_TRACEEN)) prior_warn++; } @@ -1793,8 +1864,12 @@ static __init int pt_init(void) if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; + else + pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE; - pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; + pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | + PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_AUX_PAUSE; pt_pmu.pmu.attr_groups = pt_attr_groups; pt_pmu.pmu.task_ctx_nr = perf_sw_context; pt_pmu.pmu.event_init = pt_event_init; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index 96906a62aacd..2ac36250b656 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -33,13 +33,10 @@ struct topa_entry { u64 rsvd2 : 1; u64 size : 4; u64 rsvd3 : 2; - u64 base : 36; - u64 rsvd4 : 16; + u64 base : 40; + u64 rsvd4 : 12; }; -/* TSC to Core Crystal Clock Ratio */ -#define CPUID_TSC_LEAF 0x15 - struct pt_pmu { struct pmu pmu; u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; @@ -65,6 +62,7 @@ struct pt_pmu { * @head: logical write offset inside the buffer * @snapshot: if this is for a snapshot/overwrite counter * @single: use Single Range Output instead of ToPA + * @wrapped: buffer advance wrapped back to the first topa table * @stop_pos: STOP topa entry index * @intr_pos: INT topa entry index * @stop_te: STOP topa entry pointer @@ -82,6 +80,7 @@ struct pt_buffer { local64_t head; bool snapshot; bool single; + bool wrapped; long stop_pos, intr_pos; struct topa_entry *stop_te, *intr_te; void **data_pages; @@ -117,6 +116,8 @@ struct pt_filters { * @filters: last configured filters * @handle_nmi: do handle PT PMI on this cpu, there's an active event * @vmx_on: 1 if VMX is ON on this cpu + * @pause_allowed: PERF_EF_PAUSE is allowed to stop tracing + * @resume_allowed: PERF_EF_RESUME is allowed to start tracing * @output_base: cached RTIT_OUTPUT_BASE MSR value * @output_mask: cached RTIT_OUTPUT_MASK MSR value */ @@ -125,6 +126,8 @@ struct pt { struct pt_filters filters; int handle_nmi; int vmx_on; + int pause_allowed; + int resume_allowed; u64 output_base; u64 output_mask; }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 419c517b8594..e0815a12db90 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -3,6 +3,7 @@ #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/msr.h> #include "uncore.h" #include "uncore_discovery.h" @@ -34,6 +35,7 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); +MODULE_DESCRIPTION("Support for Intel uncore performance events"); MODULE_LICENSE("GPL"); int uncore_pcibus_to_dieid(struct pci_bus *bus) @@ -149,7 +151,7 @@ u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *eve { u64 count; - rdmsrl(event->hw.event_base, count); + rdmsrq(event->hw.event_base, count); return count; } @@ -263,6 +265,9 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, return; } + if (intel_generic_uncore_assign_hw_event(event, box)) + return; + hwc->config_base = uncore_event_ctl(box, hwc->idx); hwc->event_base = uncore_perf_ctr(box, hwc->idx); } @@ -301,17 +306,11 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) { struct intel_uncore_box *box; struct perf_event *event; - unsigned long flags; int bit; box = container_of(hrtimer, struct intel_uncore_box, hrtimer); if (!box->n_active || box->cpu != smp_processor_id()) return HRTIMER_NORESTART; - /* - * disable local interrupt to prevent uncore_pmu_event_start/stop - * to interrupt the update process - */ - local_irq_save(flags); /* * handle boxes with an active event list as opposed to active @@ -324,8 +323,6 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) uncore_perf_event_update(box, box->events[bit]); - local_irq_restore(flags); - hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); return HRTIMER_RESTART; } @@ -333,7 +330,7 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) { hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration), - HRTIMER_MODE_REL_PINNED); + HRTIMER_MODE_REL_PINNED_HARD); } void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) @@ -343,8 +340,7 @@ void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) { - hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - box->hrtimer.function = uncore_pmu_hrtimer; + hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); } static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, @@ -741,7 +737,7 @@ static int uncore_pmu_event_init(struct perf_event *event) pmu = uncore_event_to_pmu(event); /* no device found for this pmu */ - if (pmu->func_id < 0) + if (!pmu->registered) return -ENOENT; /* Sampling not supported yet */ @@ -843,7 +839,9 @@ static void uncore_pmu_disable(struct pmu *pmu) static ssize_t uncore_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { - return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask); + struct intel_uncore_pmu *pmu = container_of(dev_get_drvdata(dev), struct intel_uncore_pmu, pmu); + + return cpumap_print_to_pagebuf(true, buf, &pmu->cpu_mask); } static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); @@ -860,7 +858,10 @@ static const struct attribute_group uncore_pmu_attr_group = { static inline int uncore_get_box_id(struct intel_uncore_type *type, struct intel_uncore_pmu *pmu) { - return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx; + if (type->boxes) + return intel_uncore_find_discovery_unit_id(type->boxes, -1, pmu->pmu_idx); + + return pmu->pmu_idx; } void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu) @@ -961,6 +962,9 @@ static void uncore_type_exit(struct intel_uncore_type *type) if (type->cleanup_mapping) type->cleanup_mapping(type); + if (type->cleanup_extra_boxes) + type->cleanup_extra_boxes(type); + if (pmu) { for (i = 0; i < type->num_boxes; i++, pmu++) { uncore_pmu_unregister(pmu); @@ -969,10 +973,7 @@ static void uncore_type_exit(struct intel_uncore_type *type) kfree(type->pmus); type->pmus = NULL; } - if (type->box_ids) { - kfree(type->box_ids); - type->box_ids = NULL; - } + kfree(type->events_group); type->events_group = NULL; } @@ -983,7 +984,7 @@ static void uncore_types_exit(struct intel_uncore_type **types) uncore_type_exit(*types); } -static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) +static int __init uncore_type_init(struct intel_uncore_type *type) { struct intel_uncore_pmu *pmus; size_t size; @@ -996,7 +997,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) size = uncore_max_dies() * sizeof(struct intel_uncore_box *); for (i = 0; i < type->num_boxes; i++) { - pmus[i].func_id = setid ? i : -1; pmus[i].pmu_idx = i; pmus[i].type = type; pmus[i].boxes = kzalloc(size, GFP_KERNEL); @@ -1046,12 +1046,12 @@ err: } static int __init -uncore_types_init(struct intel_uncore_type **types, bool setid) +uncore_types_init(struct intel_uncore_type **types) { int ret; for (; *types; types++) { - ret = uncore_type_init(*types, setid); + ret = uncore_type_init(*types); if (ret) return ret; } @@ -1076,22 +1076,19 @@ static struct intel_uncore_pmu * uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev) { struct intel_uncore_type **types = uncore_pci_uncores; + struct intel_uncore_discovery_unit *unit; struct intel_uncore_type *type; - u64 box_ctl; - int i, die; + struct rb_node *node; for (; *types; types++) { type = *types; - for (die = 0; die < __uncore_max_dies; die++) { - for (i = 0; i < type->num_boxes; i++) { - if (!type->box_ctls[die]) - continue; - box_ctl = type->box_ctls[die] + type->pci_offsets[i]; - if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) && - pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) && - pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl)) - return &type->pmus[i]; - } + + for (node = rb_first(type->boxes); node; node = rb_next(node)) { + unit = rb_entry(node, struct intel_uncore_discovery_unit, node); + if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(unit->addr) && + pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(unit->addr) && + pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr)) + return &type->pmus[unit->pmu_idx]; } } @@ -1154,11 +1151,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev, if (!box) return -ENOMEM; - if (pmu->func_id < 0) - pmu->func_id = pdev->devfn; - else - WARN_ON_ONCE(pmu->func_id != pdev->devfn); - atomic_inc(&box->refcnt); box->dieid = die; box->pci_dev = pdev; @@ -1367,28 +1359,25 @@ static struct notifier_block uncore_pci_notifier = { static void uncore_pci_pmus_register(void) { struct intel_uncore_type **types = uncore_pci_uncores; + struct intel_uncore_discovery_unit *unit; struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; + struct rb_node *node; struct pci_dev *pdev; - u64 box_ctl; - int i, die; for (; *types; types++) { type = *types; - for (die = 0; die < __uncore_max_dies; die++) { - for (i = 0; i < type->num_boxes; i++) { - if (!type->box_ctls[die]) - continue; - box_ctl = type->box_ctls[die] + type->pci_offsets[i]; - pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl), - UNCORE_DISCOVERY_PCI_BUS(box_ctl), - UNCORE_DISCOVERY_PCI_DEVFN(box_ctl)); - if (!pdev) - continue; - pmu = &type->pmus[i]; - - uncore_pci_pmu_register(pdev, type, pmu, die); - } + + for (node = rb_first(type->boxes); node; node = rb_next(node)) { + unit = rb_entry(node, struct intel_uncore_discovery_unit, node); + pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(unit->addr), + UNCORE_DISCOVERY_PCI_BUS(unit->addr), + UNCORE_DISCOVERY_PCI_DEVFN(unit->addr)); + + if (!pdev) + continue; + pmu = &type->pmus[unit->pmu_idx]; + uncore_pci_pmu_register(pdev, type, pmu, unit->die); } } @@ -1407,7 +1396,7 @@ static int __init uncore_pci_init(void) goto err; } - ret = uncore_types_init(uncore_pci_uncores, false); + ret = uncore_types_init(uncore_pci_uncores); if (ret) goto errtype; @@ -1453,6 +1442,18 @@ static void uncore_pci_exit(void) } } +static bool uncore_die_has_box(struct intel_uncore_type *type, + int die, unsigned int pmu_idx) +{ + if (!type->boxes) + return true; + + if (intel_uncore_find_discovery_unit_id(type->boxes, die, pmu_idx) < 0) + return false; + + return true; +} + static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, int new_cpu) { @@ -1468,18 +1469,25 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, if (old_cpu < 0) { WARN_ON_ONCE(box->cpu != -1); - box->cpu = new_cpu; + if (uncore_die_has_box(type, die, pmu->pmu_idx)) { + box->cpu = new_cpu; + cpumask_set_cpu(new_cpu, &pmu->cpu_mask); + } continue; } - WARN_ON_ONCE(box->cpu != old_cpu); + WARN_ON_ONCE(box->cpu != -1 && box->cpu != old_cpu); box->cpu = -1; + cpumask_clear_cpu(old_cpu, &pmu->cpu_mask); if (new_cpu < 0) continue; + if (!uncore_die_has_box(type, die, pmu->pmu_idx)) + continue; uncore_pmu_cancel_hrtimer(box); perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu); box->cpu = new_cpu; + cpumask_set_cpu(new_cpu, &pmu->cpu_mask); } } @@ -1502,7 +1510,7 @@ static void uncore_box_unref(struct intel_uncore_type **types, int id) pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { box = pmu->boxes[id]; - if (box && atomic_dec_return(&box->refcnt) == 0) + if (box && box->cpu >= 0 && atomic_dec_return(&box->refcnt) == 0) uncore_box_exit(box); } } @@ -1592,7 +1600,7 @@ static int uncore_box_ref(struct intel_uncore_type **types, pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { box = pmu->boxes[id]; - if (box && atomic_inc_return(&box->refcnt) == 1) + if (box && box->cpu >= 0 && atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } @@ -1656,7 +1664,7 @@ static int __init uncore_cpu_init(void) { int ret; - ret = uncore_types_init(uncore_msr_uncores, true); + ret = uncore_types_init(uncore_msr_uncores); if (ret) goto err; @@ -1675,7 +1683,7 @@ static int __init uncore_mmio_init(void) struct intel_uncore_type **types = uncore_mmio_uncores; int ret; - ret = uncore_types_init(types, true); + ret = uncore_types_init(types); if (ret) goto err; @@ -1794,6 +1802,11 @@ static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { .mmio_init = adl_uncore_mmio_init, }; +static const struct intel_uncore_init_fun lnl_uncore_init __initconst = { + .cpu_init = lnl_uncore_cpu_init, + .mmio_init = lnl_uncore_mmio_init, +}; + static const struct intel_uncore_init_fun icx_uncore_init __initconst = { .cpu_init = icx_uncore_cpu_init, .pci_init = icx_uncore_pci_init, @@ -1871,6 +1884,10 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), @@ -1879,6 +1896,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_uncore_init), {}, }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 4838502d89ae..3dcb88c0ecfa 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -62,7 +62,6 @@ struct intel_uncore_type { unsigned fixed_ctr; unsigned fixed_ctl; unsigned box_ctl; - u64 *box_ctls; /* Unit ctrl addr of the first box of each die */ union { unsigned msr_offset; unsigned mmio_offset; @@ -76,7 +75,6 @@ struct intel_uncore_type { u64 *pci_offsets; u64 *mmio_offsets; }; - unsigned *box_ids; struct event_constraint unconstrainted; struct event_constraint *constraints; struct intel_uncore_pmu *pmus; @@ -86,6 +84,7 @@ struct intel_uncore_type { const struct attribute_group *attr_groups[4]; const struct attribute_group **attr_update; struct pmu *pmu; /* for custom pmu ops */ + struct rb_root *boxes; /* * Uncore PMU would store relevant platform topology configuration here * to identify which platform component each PMON block of that type is @@ -98,6 +97,10 @@ struct intel_uncore_type { int (*get_topology)(struct intel_uncore_type *type); void (*set_mapping)(struct intel_uncore_type *type); void (*cleanup_mapping)(struct intel_uncore_type *type); + /* + * Optional callbacks for extra uncore units cleanup + */ + void (*cleanup_extra_boxes)(struct intel_uncore_type *type); }; #define pmu_group attr_groups[0] @@ -122,9 +125,9 @@ struct intel_uncore_pmu { struct pmu pmu; char name[UNCORE_PMU_NAME_LEN]; int pmu_idx; - int func_id; bool registered; atomic_t activeboxes; + cpumask_t cpu_mask; struct intel_uncore_type *type; struct intel_uncore_box **boxes; }; @@ -607,10 +610,12 @@ void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); void tgl_uncore_cpu_init(void); void adl_uncore_cpu_init(void); +void lnl_uncore_cpu_init(void); void mtl_uncore_cpu_init(void); void tgl_uncore_mmio_init(void); void tgl_l_uncore_mmio_init(void); void adl_uncore_mmio_init(void); +void lnl_uncore_mmio_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 9a698a92962a..18a3022f26a0 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -5,6 +5,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <asm/msr.h> #include "uncore.h" #include "uncore_discovery.h" @@ -89,9 +90,7 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit) if (!type) return NULL; - type->box_ctrl_die = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL); - if (!type->box_ctrl_die) - goto free_type; + type->units = RB_ROOT; type->access_type = unit->access_type; num_discovered_types[type->access_type]++; @@ -100,12 +99,6 @@ add_uncore_discovery_type(struct uncore_unit_discovery *unit) rb_add(&type->node, &discovery_tables, __type_less); return type; - -free_type: - kfree(type); - - return NULL; - } static struct intel_uncore_discovery_type * @@ -120,14 +113,118 @@ get_uncore_discovery_type(struct uncore_unit_discovery *unit) return add_uncore_discovery_type(unit); } +static inline int pmu_idx_cmp(const void *key, const struct rb_node *b) +{ + struct intel_uncore_discovery_unit *unit; + const unsigned int *id = key; + + unit = rb_entry(b, struct intel_uncore_discovery_unit, node); + + if (unit->pmu_idx > *id) + return -1; + else if (unit->pmu_idx < *id) + return 1; + + return 0; +} + +static struct intel_uncore_discovery_unit * +intel_uncore_find_discovery_unit(struct rb_root *units, int die, + unsigned int pmu_idx) +{ + struct intel_uncore_discovery_unit *unit; + struct rb_node *pos; + + if (!units) + return NULL; + + pos = rb_find_first(&pmu_idx, units, pmu_idx_cmp); + if (!pos) + return NULL; + unit = rb_entry(pos, struct intel_uncore_discovery_unit, node); + + if (die < 0) + return unit; + + for (; pos; pos = rb_next(pos)) { + unit = rb_entry(pos, struct intel_uncore_discovery_unit, node); + + if (unit->pmu_idx != pmu_idx) + break; + + if (unit->die == die) + return unit; + } + + return NULL; +} + +int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die, + unsigned int pmu_idx) +{ + struct intel_uncore_discovery_unit *unit; + + unit = intel_uncore_find_discovery_unit(units, die, pmu_idx); + if (unit) + return unit->id; + + return -1; +} + +static inline bool unit_less(struct rb_node *a, const struct rb_node *b) +{ + struct intel_uncore_discovery_unit *a_node, *b_node; + + a_node = rb_entry(a, struct intel_uncore_discovery_unit, node); + b_node = rb_entry(b, struct intel_uncore_discovery_unit, node); + + if (a_node->pmu_idx < b_node->pmu_idx) + return true; + if (a_node->pmu_idx > b_node->pmu_idx) + return false; + + if (a_node->die < b_node->die) + return true; + if (a_node->die > b_node->die) + return false; + + return 0; +} + +static inline struct intel_uncore_discovery_unit * +uncore_find_unit(struct rb_root *root, unsigned int id) +{ + struct intel_uncore_discovery_unit *unit; + struct rb_node *node; + + for (node = rb_first(root); node; node = rb_next(node)) { + unit = rb_entry(node, struct intel_uncore_discovery_unit, node); + if (unit->id == id) + return unit; + } + + return NULL; +} + +void uncore_find_add_unit(struct intel_uncore_discovery_unit *node, + struct rb_root *root, u16 *num_units) +{ + struct intel_uncore_discovery_unit *unit = uncore_find_unit(root, node->id); + + if (unit) + node->pmu_idx = unit->pmu_idx; + else if (num_units) + node->pmu_idx = (*num_units)++; + + rb_add(&node->node, root, unit_less); +} + static void uncore_insert_box_info(struct uncore_unit_discovery *unit, - int die, bool parsed) + int die) { + struct intel_uncore_discovery_unit *node; struct intel_uncore_discovery_type *type; - unsigned int *ids; - u64 *box_offset; - int i; if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) { pr_info("Invalid address is detected for uncore type %d box %d, " @@ -136,71 +233,29 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit, return; } - if (parsed) { - type = search_uncore_discovery_type(unit->box_type); - if (!type) { - pr_info("A spurious uncore type %d is detected, " - "Disable the uncore type.\n", - unit->box_type); - return; - } - /* Store the first box of each die */ - if (!type->box_ctrl_die[die]) - type->box_ctrl_die[die] = unit->ctl; + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) return; - } - type = get_uncore_discovery_type(unit); - if (!type) - return; + node->die = die; + node->id = unit->box_id; + node->addr = unit->ctl; - box_offset = kcalloc(type->num_boxes + 1, sizeof(u64), GFP_KERNEL); - if (!box_offset) + type = get_uncore_discovery_type(unit); + if (!type) { + kfree(node); return; + } - ids = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); - if (!ids) - goto free_box_offset; + uncore_find_add_unit(node, &type->units, &type->num_units); /* Store generic information for the first box */ - if (!type->num_boxes) { - type->box_ctrl = unit->ctl; - type->box_ctrl_die[die] = unit->ctl; + if (type->num_units == 1) { type->num_counters = unit->num_regs; type->counter_width = unit->bit_width; type->ctl_offset = unit->ctl_offset; type->ctr_offset = unit->ctr_offset; - *ids = unit->box_id; - goto end; - } - - for (i = 0; i < type->num_boxes; i++) { - ids[i] = type->ids[i]; - box_offset[i] = type->box_offset[i]; - - if (unit->box_id == ids[i]) { - pr_info("Duplicate uncore type %d box ID %d is detected, " - "Drop the duplicate uncore unit.\n", - unit->box_type, unit->box_id); - goto free_ids; - } } - ids[i] = unit->box_id; - box_offset[i] = unit->ctl - type->box_ctrl; - kfree(type->ids); - kfree(type->box_offset); -end: - type->ids = ids; - type->box_offset = box_offset; - type->num_boxes++; - return; - -free_ids: - kfree(ids); - -free_box_offset: - kfree(box_offset); - } static bool @@ -279,7 +334,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die, if (uncore_ignore_unit(&unit, ignore)) continue; - uncore_insert_box_info(&unit, die, *parsed); + uncore_insert_box_info(&unit, die); } *parsed = true; @@ -339,9 +394,16 @@ err: void intel_uncore_clear_discovery_tables(void) { struct intel_uncore_discovery_type *type, *next; + struct intel_uncore_discovery_unit *pos; + struct rb_node *node; rbtree_postorder_for_each_entry_safe(type, next, &discovery_tables, node) { - kfree(type->box_ctrl_die); + while (!RB_EMPTY_ROOT(&type->units)) { + node = rb_first(&type->units); + pos = rb_entry(node, struct intel_uncore_discovery_unit, node); + rb_erase(node, &type->units); + kfree(pos); + } kfree(type); } } @@ -366,19 +428,31 @@ static const struct attribute_group generic_uncore_format_group = { .attrs = generic_uncore_formats_attr, }; +static u64 intel_generic_uncore_box_ctl(struct intel_uncore_box *box) +{ + struct intel_uncore_discovery_unit *unit; + + unit = intel_uncore_find_discovery_unit(box->pmu->type->boxes, + -1, box->pmu->pmu_idx); + if (WARN_ON_ONCE(!unit)) + return 0; + + return unit->addr; +} + void intel_generic_uncore_msr_init_box(struct intel_uncore_box *box) { - wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); + wrmsrq(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_INT); } void intel_generic_uncore_msr_disable_box(struct intel_uncore_box *box) { - wrmsrl(uncore_msr_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); + wrmsrq(intel_generic_uncore_box_ctl(box), GENERIC_PMON_BOX_CTL_FRZ); } void intel_generic_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(uncore_msr_box_ctl(box), 0); + wrmsrq(intel_generic_uncore_box_ctl(box), 0); } static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box, @@ -386,7 +460,7 @@ static void intel_generic_uncore_msr_enable_event(struct intel_uncore_box *box, { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config); } static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box, @@ -394,7 +468,7 @@ static void intel_generic_uncore_msr_disable_event(struct intel_uncore_box *box, { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, 0); + wrmsrq(hwc->config_base, 0); } static struct intel_uncore_ops generic_uncore_msr_ops = { @@ -406,10 +480,47 @@ static struct intel_uncore_ops generic_uncore_msr_ops = { .read_counter = uncore_msr_read_counter, }; +bool intel_generic_uncore_assign_hw_event(struct perf_event *event, + struct intel_uncore_box *box) +{ + struct hw_perf_event *hwc = &event->hw; + u64 box_ctl; + + if (!box->pmu->type->boxes) + return false; + + if (box->io_addr) { + hwc->config_base = uncore_pci_event_ctl(box, hwc->idx); + hwc->event_base = uncore_pci_perf_ctr(box, hwc->idx); + return true; + } + + box_ctl = intel_generic_uncore_box_ctl(box); + if (!box_ctl) + return false; + + if (box->pci_dev) { + box_ctl = UNCORE_DISCOVERY_PCI_BOX_CTRL(box_ctl); + hwc->config_base = box_ctl + uncore_pci_event_ctl(box, hwc->idx); + hwc->event_base = box_ctl + uncore_pci_perf_ctr(box, hwc->idx); + return true; + } + + hwc->config_base = box_ctl + box->pmu->type->event_ctl + hwc->idx; + hwc->event_base = box_ctl + box->pmu->type->perf_ctr + hwc->idx; + + return true; +} + +static inline int intel_pci_uncore_box_ctl(struct intel_uncore_box *box) +{ + return UNCORE_DISCOVERY_PCI_BOX_CTRL(intel_generic_uncore_box_ctl(box)); +} + void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; - int box_ctl = uncore_pci_box_ctl(box); + int box_ctl = intel_pci_uncore_box_ctl(box); __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_INT); @@ -418,7 +529,7 @@ void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box) void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; - int box_ctl = uncore_pci_box_ctl(box); + int box_ctl = intel_pci_uncore_box_ctl(box); pci_write_config_dword(pdev, box_ctl, GENERIC_PMON_BOX_CTL_FRZ); } @@ -426,7 +537,7 @@ void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box) void intel_generic_uncore_pci_enable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; - int box_ctl = uncore_pci_box_ctl(box); + int box_ctl = intel_pci_uncore_box_ctl(box); pci_write_config_dword(pdev, box_ctl, 0); } @@ -473,34 +584,30 @@ static struct intel_uncore_ops generic_uncore_pci_ops = { #define UNCORE_GENERIC_MMIO_SIZE 0x4000 -static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) -{ - struct intel_uncore_type *type = box->pmu->type; - - if (!type->box_ctls || !type->box_ctls[box->dieid] || !type->mmio_offsets) - return 0; - - return type->box_ctls[box->dieid] + type->mmio_offsets[box->pmu->pmu_idx]; -} - void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) { - u64 box_ctl = generic_uncore_mmio_box_ctl(box); + static struct intel_uncore_discovery_unit *unit; struct intel_uncore_type *type = box->pmu->type; resource_size_t addr; - if (!box_ctl) { + unit = intel_uncore_find_discovery_unit(type->boxes, box->dieid, box->pmu->pmu_idx); + if (!unit) { + pr_warn("Uncore type %d id %d: Cannot find box control address.\n", + type->type_id, box->pmu->pmu_idx); + return; + } + + if (!unit->addr) { pr_warn("Uncore type %d box %d: Invalid box control address.\n", - type->type_id, type->box_ids[box->pmu->pmu_idx]); + type->type_id, unit->id); return; } - addr = box_ctl; + addr = unit->addr; box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE); if (!box->io_addr) { pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n", - type->type_id, type->box_ids[box->pmu->pmu_idx], - (unsigned long long)addr); + type->type_id, unit->id, (unsigned long long)addr); return; } @@ -560,34 +667,22 @@ static bool uncore_update_uncore_type(enum uncore_access_type type_id, struct intel_uncore_discovery_type *type) { uncore->type_id = type->type; - uncore->num_boxes = type->num_boxes; uncore->num_counters = type->num_counters; uncore->perf_ctr_bits = type->counter_width; - uncore->box_ids = type->ids; + uncore->perf_ctr = (unsigned int)type->ctr_offset; + uncore->event_ctl = (unsigned int)type->ctl_offset; + uncore->boxes = &type->units; + uncore->num_boxes = type->num_units; switch (type_id) { case UNCORE_ACCESS_MSR: uncore->ops = &generic_uncore_msr_ops; - uncore->perf_ctr = (unsigned int)type->box_ctrl + type->ctr_offset; - uncore->event_ctl = (unsigned int)type->box_ctrl + type->ctl_offset; - uncore->box_ctl = (unsigned int)type->box_ctrl; - uncore->msr_offsets = type->box_offset; break; case UNCORE_ACCESS_PCI: uncore->ops = &generic_uncore_pci_ops; - uncore->perf_ctr = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctr_offset; - uncore->event_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl) + type->ctl_offset; - uncore->box_ctl = (unsigned int)UNCORE_DISCOVERY_PCI_BOX_CTRL(type->box_ctrl); - uncore->box_ctls = type->box_ctrl_die; - uncore->pci_offsets = type->box_offset; break; case UNCORE_ACCESS_MMIO: uncore->ops = &generic_uncore_mmio_ops; - uncore->perf_ctr = (unsigned int)type->ctr_offset; - uncore->event_ctl = (unsigned int)type->ctl_offset; - uncore->box_ctl = (unsigned int)type->box_ctrl; - uncore->box_ctls = type->box_ctrl_die; - uncore->mmio_offsets = type->box_offset; uncore->mmio_map_size = UNCORE_GENERIC_MMIO_SIZE; break; default: diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 22e769a81103..0e94aa7db8e7 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -113,19 +113,24 @@ struct uncore_unit_discovery { }; }; +struct intel_uncore_discovery_unit { + struct rb_node node; + unsigned int pmu_idx; /* The idx of the corresponding PMU */ + unsigned int id; /* Unit ID */ + unsigned int die; /* Die ID */ + u64 addr; /* Unit Control Address */ +}; + struct intel_uncore_discovery_type { struct rb_node node; enum uncore_access_type access_type; - u64 box_ctrl; /* Unit ctrl addr of the first box */ - u64 *box_ctrl_die; /* Unit ctrl addr of the first box of each die */ + struct rb_root units; /* Unit ctrl addr for all units */ u16 type; /* Type ID of the uncore block */ u8 num_counters; u8 counter_width; u8 ctl_offset; /* Counter Control 0 offset */ u8 ctr_offset; /* Counter 0 offset */ - u16 num_boxes; /* number of boxes for the uncore block */ - unsigned int *ids; /* Box IDs */ - u64 *box_offset; /* Box offset */ + u16 num_units; /* number of units */ }; bool intel_uncore_has_discovery_tables(int *ignore); @@ -156,3 +161,10 @@ u64 intel_generic_uncore_pci_read_counter(struct intel_uncore_box *box, struct intel_uncore_type ** intel_uncore_generic_init_uncores(enum uncore_access_type type_id, int num_extra); + +int intel_uncore_find_discovery_unit_id(struct rb_root *units, int die, + unsigned int pmu_idx); +bool intel_generic_uncore_assign_hw_event(struct perf_event *event, + struct intel_uncore_box *box); +void uncore_find_add_unit(struct intel_uncore_discovery_unit *node, + struct rb_root *root, u16 *num_units); diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 466833478e81..8962e7cb21e3 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Nehalem-EX/Westmere-EX uncore support */ #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "uncore.h" /* NHM-EX event control */ @@ -200,12 +201,12 @@ DEFINE_UNCORE_FORMAT_ATTR(mask, mask, "config2:0-63"); static void nhmex_uncore_msr_init_box(struct intel_uncore_box *box) { - wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); + wrmsrq(NHMEX_U_MSR_PMON_GLOBAL_CTL, NHMEX_U_PMON_GLOBAL_EN_ALL); } static void nhmex_uncore_msr_exit_box(struct intel_uncore_box *box) { - wrmsrl(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0); + wrmsrq(NHMEX_U_MSR_PMON_GLOBAL_CTL, 0); } static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) @@ -214,12 +215,12 @@ static void nhmex_uncore_msr_disable_box(struct intel_uncore_box *box) u64 config; if (msr) { - rdmsrl(msr, config); + rdmsrq(msr, config); config &= ~((1ULL << uncore_num_counters(box)) - 1); /* WBox has a fixed counter */ if (uncore_msr_fixed_ctl(box)) config &= ~NHMEX_W_PMON_GLOBAL_FIXED_EN; - wrmsrl(msr, config); + wrmsrq(msr, config); } } @@ -229,18 +230,18 @@ static void nhmex_uncore_msr_enable_box(struct intel_uncore_box *box) u64 config; if (msr) { - rdmsrl(msr, config); + rdmsrq(msr, config); config |= (1ULL << uncore_num_counters(box)) - 1; /* WBox has a fixed counter */ if (uncore_msr_fixed_ctl(box)) config |= NHMEX_W_PMON_GLOBAL_FIXED_EN; - wrmsrl(msr, config); + wrmsrq(msr, config); } } static void nhmex_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) { - wrmsrl(event->hw.config_base, 0); + wrmsrq(event->hw.config_base, 0); } static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -248,11 +249,11 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p struct hw_perf_event *hwc = &event->hw; if (hwc->idx == UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); + wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); else - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); + wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); } #define NHMEX_UNCORE_OPS_COMMON_INIT() \ @@ -382,10 +383,10 @@ static void nhmex_bbox_msr_enable_event(struct intel_uncore_box *box, struct per struct hw_perf_event_extra *reg2 = &hwc->branch_reg; if (reg1->idx != EXTRA_REG_NONE) { - wrmsrl(reg1->reg, reg1->config); - wrmsrl(reg1->reg + 1, reg2->config); + wrmsrq(reg1->reg, reg1->config); + wrmsrq(reg1->reg + 1, reg2->config); } - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | (hwc->config & NHMEX_B_PMON_CTL_EV_SEL_MASK)); } @@ -467,12 +468,12 @@ static void nhmex_sbox_msr_enable_event(struct intel_uncore_box *box, struct per struct hw_perf_event_extra *reg2 = &hwc->branch_reg; if (reg1->idx != EXTRA_REG_NONE) { - wrmsrl(reg1->reg, 0); - wrmsrl(reg1->reg + 1, reg1->config); - wrmsrl(reg1->reg + 2, reg2->config); - wrmsrl(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); + wrmsrq(reg1->reg, 0); + wrmsrq(reg1->reg + 1, reg1->config); + wrmsrq(reg1->reg + 2, reg2->config); + wrmsrq(reg1->reg, NHMEX_S_PMON_MM_CFG_EN); } - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); + wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); } static struct attribute *nhmex_uncore_sbox_formats_attr[] = { @@ -842,25 +843,25 @@ static void nhmex_mbox_msr_enable_event(struct intel_uncore_box *box, struct per idx = __BITS_VALUE(reg1->idx, 0, 8); if (idx != 0xff) - wrmsrl(__BITS_VALUE(reg1->reg, 0, 16), + wrmsrq(__BITS_VALUE(reg1->reg, 0, 16), nhmex_mbox_shared_reg_config(box, idx)); idx = __BITS_VALUE(reg1->idx, 1, 8); if (idx != 0xff) - wrmsrl(__BITS_VALUE(reg1->reg, 1, 16), + wrmsrq(__BITS_VALUE(reg1->reg, 1, 16), nhmex_mbox_shared_reg_config(box, idx)); if (reg2->idx != EXTRA_REG_NONE) { - wrmsrl(reg2->reg, 0); + wrmsrq(reg2->reg, 0); if (reg2->config != ~0ULL) { - wrmsrl(reg2->reg + 1, + wrmsrq(reg2->reg + 1, reg2->config & NHMEX_M_PMON_ADDR_MATCH_MASK); - wrmsrl(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & + wrmsrq(reg2->reg + 2, NHMEX_M_PMON_ADDR_MASK_MASK & (reg2->config >> NHMEX_M_PMON_ADDR_MASK_SHIFT)); - wrmsrl(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); + wrmsrq(reg2->reg, NHMEX_M_PMON_MM_CFG_EN); } } - wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); + wrmsrq(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT0); } DEFINE_UNCORE_FORMAT_ATTR(count_mode, count_mode, "config:2-3"); @@ -1121,31 +1122,31 @@ static void nhmex_rbox_msr_enable_event(struct intel_uncore_box *box, struct per switch (idx % 6) { case 0: - wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); + wrmsrq(NHMEX_R_MSR_PORTN_IPERF_CFG0(port), reg1->config); break; case 1: - wrmsrl(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); + wrmsrq(NHMEX_R_MSR_PORTN_IPERF_CFG1(port), reg1->config); break; case 2: case 3: - wrmsrl(NHMEX_R_MSR_PORTN_QLX_CFG(port), + wrmsrq(NHMEX_R_MSR_PORTN_QLX_CFG(port), uncore_shared_reg_config(box, 2 + (idx / 6) * 5)); break; case 4: - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MM_CFG(port), hwc->config >> 32); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MATCH(port), reg1->config); + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET1_MASK(port), reg2->config); break; case 5: - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MM_CFG(port), hwc->config >> 32); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); - wrmsrl(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MATCH(port), reg1->config); + wrmsrq(NHMEX_R_MSR_PORTN_XBR_SET2_MASK(port), reg2->config); break; } - wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | + wrmsrq(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0 | (hwc->config & NHMEX_R_PMON_CTL_EV_SEL_MASK)); } diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 9462fd9f3b7a..a1a96833e30e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ +#include <asm/msr.h> #include "uncore.h" #include "uncore_discovery.h" @@ -252,6 +253,7 @@ DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); DEFINE_UNCORE_FORMAT_ATTR(threshold, threshold, "config:24-29"); +DEFINE_UNCORE_FORMAT_ATTR(threshold2, threshold, "config:24-31"); /* Sandy Bridge uncore support */ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -259,34 +261,34 @@ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct per struct hw_perf_event *hwc = &event->hw; if (hwc->idx < UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); else - wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); + wrmsrq(hwc->config_base, SNB_UNC_CTL_EN); } static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, struct perf_event *event) { - wrmsrl(event->hw.config_base, 0); + wrmsrq(event->hw.config_base, 0); } static void snb_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) { - wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + wrmsrq(SNB_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); } } static void snb_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, + wrmsrq(SNB_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); } static void snb_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(SNB_UNC_PERF_GLOBAL_CTL, 0); } static struct uncore_event_desc snb_uncore_events[] = { @@ -371,7 +373,7 @@ void snb_uncore_cpu_init(void) static void skl_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) { - wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); } @@ -382,14 +384,14 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box) static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, + wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); } static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, 0); } static struct intel_uncore_ops skl_uncore_msr_ops = { @@ -503,7 +505,7 @@ static int icl_get_cbox_num(void) { u64 num_boxes; - rdmsrl(ICL_UNC_CBO_CONFIG, num_boxes); + rdmsrq(ICL_UNC_CBO_CONFIG, num_boxes); return num_boxes & ICL_UNC_NUM_CBO_MASK; } @@ -524,7 +526,7 @@ static struct intel_uncore_type *tgl_msr_uncores[] = { static void rkl_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); } void tgl_uncore_cpu_init(void) @@ -540,24 +542,24 @@ void tgl_uncore_cpu_init(void) static void adl_uncore_msr_init_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); } static void adl_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); } static void adl_uncore_msr_disable_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, 0); } static void adl_uncore_msr_exit_box(struct intel_uncore_box *box) { if (box->pmu->pmu_idx == 0) - wrmsrl(ADL_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(ADL_UNC_PERF_GLOBAL_CTL, 0); } static struct intel_uncore_ops adl_uncore_msr_ops = { @@ -690,7 +692,7 @@ static struct intel_uncore_type mtl_uncore_hac_cbox = { static void mtl_uncore_msr_init_box(struct intel_uncore_box *box) { - wrmsrl(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN); + wrmsrq(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN); } static struct intel_uncore_ops mtl_uncore_msr_ops = { @@ -746,6 +748,34 @@ void mtl_uncore_cpu_init(void) uncore_msr_uncores = mtl_msr_uncores; } +static struct intel_uncore_type *lnl_msr_uncores[] = { + &mtl_uncore_cbox, + &mtl_uncore_arb, + NULL +}; + +#define LNL_UNC_MSR_GLOBAL_CTL 0x240e + +static void lnl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrq(LNL_UNC_MSR_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +static struct intel_uncore_ops lnl_uncore_msr_ops = { + .init_box = lnl_uncore_msr_init_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +void lnl_uncore_cpu_init(void) +{ + mtl_uncore_cbox.num_boxes = 4; + mtl_uncore_cbox.ops = &lnl_uncore_msr_ops; + uncore_msr_uncores = lnl_msr_uncores; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -881,7 +911,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event) pmu = uncore_event_to_pmu(event); /* no device found for this pmu */ - if (pmu->func_id < 0) + if (!pmu->registered) return -ENOENT; /* Sampling not supported yet */ @@ -1277,12 +1307,12 @@ int skl_uncore_pci_init(void) /* Nehalem uncore support */ static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) { - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); + wrmsrq(NHM_UNC_PERF_GLOBAL_CTL, 0); } static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) { - wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); + wrmsrq(NHM_UNC_PERF_GLOBAL_CTL, NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); } static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -1290,9 +1320,9 @@ static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, struct per struct hw_perf_event *hwc = &event->hw; if (hwc->idx < UNCORE_PMC_IDX_FIXED) - wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); else - wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); + wrmsrq(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); } static struct attribute *nhm_uncore_formats_attr[] = { @@ -1475,39 +1505,45 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void) ids++; } + /* Just try to grab 00:00.0 device */ + if (!mc_dev) + mc_dev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); + return mc_dev; } #define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 #define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000 -static void __uncore_imc_init_box(struct intel_uncore_box *box, - unsigned int base_offset) +static void +uncore_get_box_mmio_addr(struct intel_uncore_box *box, + unsigned int base_offset, + int bar_offset, int step) { struct pci_dev *pdev = tgl_uncore_get_mc_dev(); struct intel_uncore_pmu *pmu = box->pmu; struct intel_uncore_type *type = pmu->type; resource_size_t addr; - u32 mch_bar; + u32 bar; if (!pdev) { pr_warn("perf uncore: Cannot find matched IMC device.\n"); return; } - pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar); - /* MCHBAR is disabled */ - if (!(mch_bar & BIT(0))) { - pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); + pci_read_config_dword(pdev, bar_offset, &bar); + if (!(bar & BIT(0))) { + pr_warn("perf uncore: BAR 0x%x is disabled. Failed to map %s counters.\n", + bar_offset, type->name); pci_dev_put(pdev); return; } - mch_bar &= ~BIT(0); - addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx); + bar &= ~BIT(0); + addr = (resource_size_t)(bar + step * pmu->pmu_idx); #ifdef CONFIG_PHYS_ADDR_T_64BIT - pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar); - addr |= ((resource_size_t)mch_bar << 32); + pci_read_config_dword(pdev, bar_offset + 4, &bar); + addr |= ((resource_size_t)bar << 32); #endif addr += base_offset; @@ -1518,6 +1554,14 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box, pci_dev_put(pdev); } +static void __uncore_imc_init_box(struct intel_uncore_box *box, + unsigned int base_offset) +{ + uncore_get_box_mmio_addr(box, base_offset, + SNB_UNCORE_PCI_IMC_BAR_OFFSET, + TGL_UNCORE_MMIO_IMC_MEM_OFFSET); +} + static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) { __uncore_imc_init_box(box, 0); @@ -1612,14 +1656,17 @@ static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box) writel(0, box->io_addr + uncore_mmio_box_ctl(box)); } +#define MMIO_UNCORE_COMMON_OPS() \ + .exit_box = uncore_mmio_exit_box, \ + .disable_box = adl_uncore_mmio_disable_box, \ + .enable_box = adl_uncore_mmio_enable_box, \ + .disable_event = intel_generic_uncore_mmio_disable_event, \ + .enable_event = intel_generic_uncore_mmio_enable_event, \ + .read_counter = uncore_mmio_read_counter, + static struct intel_uncore_ops adl_uncore_mmio_ops = { .init_box = adl_uncore_imc_init_box, - .exit_box = uncore_mmio_exit_box, - .disable_box = adl_uncore_mmio_disable_box, - .enable_box = adl_uncore_mmio_enable_box, - .disable_event = intel_generic_uncore_mmio_disable_event, - .enable_event = intel_generic_uncore_mmio_enable_event, - .read_counter = uncore_mmio_read_counter, + MMIO_UNCORE_COMMON_OPS() }; #define ADL_UNC_CTL_CHMASK_MASK 0x00000f00 @@ -1703,3 +1750,108 @@ void adl_uncore_mmio_init(void) } /* end of Alder Lake MMIO uncore support */ + +/* Lunar Lake MMIO uncore support */ +#define LNL_UNCORE_PCI_SAFBAR_OFFSET 0x68 +#define LNL_UNCORE_MAP_SIZE 0x1000 +#define LNL_UNCORE_SNCU_BASE 0xE4B000 +#define LNL_UNCORE_SNCU_CTR 0x390 +#define LNL_UNCORE_SNCU_CTRL 0x398 +#define LNL_UNCORE_SNCU_BOX_CTL 0x380 +#define LNL_UNCORE_GLOBAL_CTL 0x700 +#define LNL_UNCORE_HBO_BASE 0xE54000 +#define LNL_UNCORE_HBO_OFFSET -4096 +#define LNL_UNCORE_HBO_CTR 0x570 +#define LNL_UNCORE_HBO_CTRL 0x550 +#define LNL_UNCORE_HBO_BOX_CTL 0x548 + +#define LNL_UNC_CTL_THRESHOLD 0xff000000 +#define LNL_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + LNL_UNC_CTL_THRESHOLD) + +static struct attribute *lnl_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_threshold2.attr, + NULL +}; + +static const struct attribute_group lnl_uncore_format_group = { + .name = "format", + .attrs = lnl_uncore_formats_attr, +}; + +static void lnl_uncore_hbo_init_box(struct intel_uncore_box *box) +{ + uncore_get_box_mmio_addr(box, LNL_UNCORE_HBO_BASE, + LNL_UNCORE_PCI_SAFBAR_OFFSET, + LNL_UNCORE_HBO_OFFSET); +} + +static struct intel_uncore_ops lnl_uncore_hbo_ops = { + .init_box = lnl_uncore_hbo_init_box, + MMIO_UNCORE_COMMON_OPS() +}; + +static struct intel_uncore_type lnl_uncore_hbo = { + .name = "hbo", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 64, + .perf_ctr = LNL_UNCORE_HBO_CTR, + .event_ctl = LNL_UNCORE_HBO_CTRL, + .event_mask = LNL_UNC_RAW_EVENT_MASK, + .box_ctl = LNL_UNCORE_HBO_BOX_CTL, + .mmio_map_size = LNL_UNCORE_MAP_SIZE, + .ops = &lnl_uncore_hbo_ops, + .format_group = &lnl_uncore_format_group, +}; + +static void lnl_uncore_sncu_init_box(struct intel_uncore_box *box) +{ + uncore_get_box_mmio_addr(box, LNL_UNCORE_SNCU_BASE, + LNL_UNCORE_PCI_SAFBAR_OFFSET, + 0); + + if (box->io_addr) + writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + LNL_UNCORE_GLOBAL_CTL); +} + +static struct intel_uncore_ops lnl_uncore_sncu_ops = { + .init_box = lnl_uncore_sncu_init_box, + MMIO_UNCORE_COMMON_OPS() +}; + +static struct intel_uncore_type lnl_uncore_sncu = { + .name = "sncu", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 64, + .perf_ctr = LNL_UNCORE_SNCU_CTR, + .event_ctl = LNL_UNCORE_SNCU_CTRL, + .event_mask = LNL_UNC_RAW_EVENT_MASK, + .box_ctl = LNL_UNCORE_SNCU_BOX_CTL, + .mmio_map_size = LNL_UNCORE_MAP_SIZE, + .ops = &lnl_uncore_sncu_ops, + .format_group = &lnl_uncore_format_group, +}; + +static struct intel_uncore_type *lnl_mmio_uncores[] = { + &adl_uncore_imc, + &lnl_uncore_hbo, + &lnl_uncore_sncu, + &adl_uncore_imc_free_running, + NULL +}; + +void lnl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = lnl_mmio_uncores; +} + +/* end of Lunar Lake MMIO uncore support */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 74b8b21e8990..2824dc9950be 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* SandyBridge-EP/IvyTown uncore support */ #include <asm/cpu_device_id.h> +#include <asm/msr.h> #include "uncore.h" #include "uncore_discovery.h" @@ -462,6 +463,7 @@ #define SPR_UBOX_DID 0x3250 /* SPR CHA */ +#define SPR_CHA_EVENT_MASK_EXT 0xffffffff #define SPR_CHA_PMON_CTL_TID_EN (1 << 16) #define SPR_CHA_PMON_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \ SPR_CHA_PMON_CTL_TID_EN) @@ -478,6 +480,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext5, umask, "config:8-15,32-63"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -616,9 +619,9 @@ static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) msr = uncore_msr_box_ctl(box); if (msr) { - rdmsrl(msr, config); + rdmsrq(msr, config); config |= SNBEP_PMON_BOX_CTL_FRZ; - wrmsrl(msr, config); + wrmsrq(msr, config); } } @@ -629,9 +632,9 @@ static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) msr = uncore_msr_box_ctl(box); if (msr) { - rdmsrl(msr, config); + rdmsrq(msr, config); config &= ~SNBEP_PMON_BOX_CTL_FRZ; - wrmsrl(msr, config); + wrmsrq(msr, config); } } @@ -641,9 +644,9 @@ static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, struct p struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, uncore_shared_reg_config(box, 0)); + wrmsrq(reg1->reg, uncore_shared_reg_config(box, 0)); - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, @@ -651,7 +654,7 @@ static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config); } static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) @@ -659,7 +662,7 @@ static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) unsigned msr = uncore_msr_box_ctl(box); if (msr) - wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); + wrmsrq(msr, SNBEP_PMON_BOX_CTL_INT); } static struct attribute *snbep_uncore_formats_attr[] = { @@ -1530,7 +1533,7 @@ static void ivbep_uncore_msr_init_box(struct intel_uncore_box *box) { unsigned msr = uncore_msr_box_ctl(box); if (msr) - wrmsrl(msr, IVBEP_PMON_BOX_CTL_INT); + wrmsrq(msr, IVBEP_PMON_BOX_CTL_INT); } static void ivbep_uncore_pci_init_box(struct intel_uncore_box *box) @@ -1781,11 +1784,11 @@ static void ivbep_cbox_enable_event(struct intel_uncore_box *box, struct perf_ev if (reg1->idx != EXTRA_REG_NONE) { u64 filter = uncore_shared_reg_config(box, 0); - wrmsrl(reg1->reg, filter & 0xffffffff); - wrmsrl(reg1->reg + 6, filter >> 32); + wrmsrq(reg1->reg, filter & 0xffffffff); + wrmsrq(reg1->reg + 6, filter >> 32); } - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static struct intel_uncore_ops ivbep_uncore_cbox_ops = { @@ -2765,11 +2768,11 @@ static void hswep_cbox_enable_event(struct intel_uncore_box *box, if (reg1->idx != EXTRA_REG_NONE) { u64 filter = uncore_shared_reg_config(box, 0); - wrmsrl(reg1->reg, filter & 0xffffffff); - wrmsrl(reg1->reg + 1, filter >> 32); + wrmsrq(reg1->reg, filter & 0xffffffff); + wrmsrq(reg1->reg + 1, filter >> 32); } - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static struct intel_uncore_ops hswep_uncore_cbox_ops = { @@ -2814,7 +2817,7 @@ static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box) for_each_set_bit(i, (unsigned long *)&init, 64) { flags |= (1ULL << i); - wrmsrl(msr, flags); + wrmsrq(msr, flags); } } } @@ -3706,7 +3709,7 @@ static void skx_iio_enable_event(struct intel_uncore_box *box, { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static struct intel_uncore_ops skx_uncore_iio_ops = { @@ -3763,7 +3766,7 @@ static int skx_msr_cpu_bus_read(int cpu, u64 *topology) { u64 msr_value; - if (rdmsrl_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) || + if (rdmsrq_on_cpu(cpu, SKX_MSR_CPU_BUS_NUMBER, &msr_value) || !(msr_value & SKX_MSR_CPU_BUS_VALID_BIT)) return -ENXIO; @@ -4653,9 +4656,9 @@ static void snr_cha_enable_event(struct intel_uncore_box *box, struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, reg1->config); + wrmsrq(reg1->reg, reg1->config); - wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); + wrmsrq(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); } static struct intel_uncore_ops snr_uncore_chabox_ops = { @@ -4889,28 +4892,28 @@ static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), /* Free-Running IIO BANDWIDTH IN Counters */ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), { /* end: all zeroes */ }, }; @@ -5483,37 +5486,6 @@ static struct freerunning_counters icx_iio_freerunning[] = { [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, }; -static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type icx_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 9, @@ -5521,7 +5493,7 @@ static struct intel_uncore_type icx_uncore_iio_free_running = { .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, .freerunning = icx_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = icx_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; @@ -5911,9 +5883,9 @@ static void spr_uncore_msr_enable_event(struct intel_uncore_box *box, struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, reg1->config); + wrmsrq(reg1->reg, reg1->config); - wrmsrl(hwc->config_base, hwc->config); + wrmsrq(hwc->config_base, hwc->config); } static void spr_uncore_msr_disable_event(struct intel_uncore_box *box, @@ -5923,9 +5895,9 @@ static void spr_uncore_msr_disable_event(struct intel_uncore_box *box, struct hw_perf_event_extra *reg1 = &hwc->extra_reg; if (reg1->idx != EXTRA_REG_NONE) - wrmsrl(reg1->reg, 0); + wrmsrq(reg1->reg, 0); - wrmsrl(hwc->config_base, 0); + wrmsrq(hwc->config_base, 0); } static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) @@ -5933,10 +5905,11 @@ static int spr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; bool tie_en = !!(event->hw.config & SPR_CHA_PMON_CTL_TID_EN); struct intel_uncore_type *type = box->pmu->type; + int id = intel_uncore_find_discovery_unit_id(type->boxes, -1, box->pmu->pmu_idx); if (tie_en) { reg1->reg = SPR_C0_MSR_PMON_BOX_FILTER0 + - HSWEP_CBO_MSR_OFFSET * type->box_ids[box->pmu->pmu_idx]; + HSWEP_CBO_MSR_OFFSET * id; reg1->config = event->attr.config1 & SPR_CHA_PMON_BOX_FILTER_TID; reg1->idx = 0; } @@ -5958,7 +5931,7 @@ static struct intel_uncore_ops spr_uncore_chabox_ops = { static struct attribute *spr_uncore_cha_formats_attr[] = { &format_attr_event.attr, - &format_attr_umask_ext4.attr, + &format_attr_umask_ext5.attr, &format_attr_tid_en2.attr, &format_attr_edge.attr, &format_attr_inv.attr, @@ -5994,7 +5967,7 @@ ATTRIBUTE_GROUPS(uncore_alias); static struct intel_uncore_type spr_uncore_chabox = { .name = "cha", .event_mask = SPR_CHA_PMON_EVENT_MASK, - .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, + .event_mask_ext = SPR_CHA_EVENT_MASK_EXT, .num_shared_regs = 1, .constraints = skx_uncore_chabox_constraints, .ops = &spr_uncore_chabox_ops, @@ -6162,7 +6135,55 @@ static struct intel_uncore_type spr_uncore_mdf = { .name = "mdf", }; -#define UNCORE_SPR_NUM_UNCORE_TYPES 12 +static void spr_uncore_mmio_offs8_init_box(struct intel_uncore_box *box) +{ + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + intel_generic_uncore_mmio_init_box(box); +} + +static struct intel_uncore_ops spr_uncore_mmio_offs8_ops = { + .init_box = spr_uncore_mmio_offs8_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = intel_generic_uncore_mmio_disable_box, + .enable_box = intel_generic_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = spr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +#define SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT() \ + SPR_UNCORE_COMMON_FORMAT(), \ + .ops = &spr_uncore_mmio_offs8_ops + +static struct event_constraint spr_uncore_cxlcm_constraints[] = { + UNCORE_EVENT_CONSTRAINT(0x02, 0x0f), + UNCORE_EVENT_CONSTRAINT(0x05, 0x0f), + UNCORE_EVENT_CONSTRAINT(0x40, 0xf0), + UNCORE_EVENT_CONSTRAINT(0x41, 0xf0), + UNCORE_EVENT_CONSTRAINT(0x42, 0xf0), + UNCORE_EVENT_CONSTRAINT(0x43, 0xf0), + UNCORE_EVENT_CONSTRAINT(0x4b, 0xf0), + UNCORE_EVENT_CONSTRAINT(0x52, 0xf0), + EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type spr_uncore_cxlcm = { + SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(), + .name = "cxlcm", + .constraints = spr_uncore_cxlcm_constraints, +}; + +static struct intel_uncore_type spr_uncore_cxldp = { + SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(), + .name = "cxldp", +}; + +static struct intel_uncore_type spr_uncore_hbm = { + SPR_UNCORE_COMMON_FORMAT(), + .name = "hbm", +}; + +#define UNCORE_SPR_NUM_UNCORE_TYPES 15 #define UNCORE_SPR_CHA 0 #define UNCORE_SPR_IIO 1 #define UNCORE_SPR_IMC 6 @@ -6186,6 +6207,9 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = { NULL, NULL, &spr_uncore_mdf, + &spr_uncore_cxlcm, + &spr_uncore_cxldp, + &spr_uncore_hbm, }; /* @@ -6198,6 +6222,24 @@ static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = { 0, 0x8000, 0x10000, 0x18000 }; +static void spr_extra_boxes_cleanup(struct intel_uncore_type *type) +{ + struct intel_uncore_discovery_unit *pos; + struct rb_node *node; + + if (!type->boxes) + return; + + while (!RB_EMPTY_ROOT(type->boxes)) { + node = rb_first(type->boxes); + pos = rb_entry(node, struct intel_uncore_discovery_unit, node); + rb_erase(node, type->boxes); + kfree(pos); + } + kfree(type->boxes); + type->boxes = NULL; +} + static struct intel_uncore_type spr_uncore_upi = { .event_mask = SNBEP_PMON_RAW_EVENT_MASK, .event_mask_ext = SPR_RAW_EVENT_MASK_EXT, @@ -6212,10 +6254,11 @@ static struct intel_uncore_type spr_uncore_upi = { .num_counters = 4, .num_boxes = SPR_UNCORE_UPI_NUM_BOXES, .perf_ctr_bits = 48, - .perf_ctr = ICX_UPI_PCI_PMON_CTR0, - .event_ctl = ICX_UPI_PCI_PMON_CTL0, + .perf_ctr = ICX_UPI_PCI_PMON_CTR0 - ICX_UPI_PCI_PMON_BOX_CTL, + .event_ctl = ICX_UPI_PCI_PMON_CTL0 - ICX_UPI_PCI_PMON_BOX_CTL, .box_ctl = ICX_UPI_PCI_PMON_BOX_CTL, .pci_offsets = spr_upi_pci_offsets, + .cleanup_extra_boxes = spr_extra_boxes_cleanup, }; static struct intel_uncore_type spr_uncore_m3upi = { @@ -6225,11 +6268,12 @@ static struct intel_uncore_type spr_uncore_m3upi = { .num_counters = 4, .num_boxes = SPR_UNCORE_UPI_NUM_BOXES, .perf_ctr_bits = 48, - .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0, - .event_ctl = ICX_M3UPI_PCI_PMON_CTL0, + .perf_ctr = ICX_M3UPI_PCI_PMON_CTR0 - ICX_M3UPI_PCI_PMON_BOX_CTL, + .event_ctl = ICX_M3UPI_PCI_PMON_CTL0 - ICX_M3UPI_PCI_PMON_BOX_CTL, .box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL, .pci_offsets = spr_upi_pci_offsets, .constraints = icx_uncore_m3upi_constraints, + .cleanup_extra_boxes = spr_extra_boxes_cleanup, }; enum perf_uncore_spr_iio_freerunning_type_id { @@ -6246,69 +6290,13 @@ static struct freerunning_counters spr_iio_freerunning[] = { [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, }; -static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - /* Free-Running IIO BANDWIDTH OUT Counters */ - INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type spr_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 17, .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, .freerunning = spr_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = spr_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; @@ -6460,18 +6448,21 @@ uncore_find_type_by_id(struct intel_uncore_type **types, int type_id) static int uncore_type_max_boxes(struct intel_uncore_type **types, int type_id) { + struct intel_uncore_discovery_unit *unit; struct intel_uncore_type *type; - int i, max = 0; + struct rb_node *node; + int max = 0; type = uncore_find_type_by_id(types, type_id); if (!type) return 0; - for (i = 0; i < type->num_boxes; i++) { - if (type->box_ids[i] > max) - max = type->box_ids[i]; - } + for (node = rb_first(type->boxes); node; node = rb_next(node)) { + unit = rb_entry(node, struct intel_uncore_discovery_unit, node); + if (unit->id > max) + max = unit->id; + } return max + 1; } @@ -6495,7 +6486,7 @@ void spr_uncore_cpu_init(void) * of UNCORE_SPR_CHA) is incorrect on some SPR variants because of a * firmware bug. Using the value from SPR_MSR_UNC_CBO_CONFIG to replace it. */ - rdmsrl(SPR_MSR_UNC_CBO_CONFIG, num_cbo); + rdmsrq(SPR_MSR_UNC_CBO_CONFIG, num_cbo); /* * The MSR doesn't work on the EMR XCC, but the firmware bug doesn't impact * the EMR XCC. Don't let the value from the MSR replace the existing value. @@ -6513,10 +6504,11 @@ void spr_uncore_cpu_init(void) static void spr_update_device_location(int type_id) { + struct intel_uncore_discovery_unit *unit; struct intel_uncore_type *type; struct pci_dev *dev = NULL; + struct rb_root *root; u32 device, devfn; - u64 *ctls; int die; if (type_id == UNCORE_SPR_UPI) { @@ -6530,27 +6522,35 @@ static void spr_update_device_location(int type_id) } else return; - ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL); - if (!ctls) { + root = kzalloc(sizeof(struct rb_root), GFP_KERNEL); + if (!root) { type->num_boxes = 0; return; } + *root = RB_ROOT; while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) { - if (devfn != dev->devfn) - continue; die = uncore_device_to_die(dev); if (die < 0) continue; - ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET | - dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET | - devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET | - type->box_ctl; + unit = kzalloc(sizeof(*unit), GFP_KERNEL); + if (!unit) + continue; + unit->die = die; + unit->id = PCI_SLOT(dev->devfn) - PCI_SLOT(devfn); + unit->addr = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET | + dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET | + devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET | + type->box_ctl; + + unit->pmu_idx = unit->id; + + uncore_find_add_unit(unit, root, NULL); } - type->box_ctls = ctls; + type->boxes = root; } int spr_uncore_pci_init(void) @@ -6598,17 +6598,8 @@ void spr_uncore_mmio_init(void) /* GNR uncore support */ #define UNCORE_GNR_NUM_UNCORE_TYPES 23 -#define UNCORE_GNR_TYPE_15 15 -#define UNCORE_GNR_B2UPI 18 -#define UNCORE_GNR_TYPE_21 21 -#define UNCORE_GNR_TYPE_22 22 int gnr_uncore_units_ignore[] = { - UNCORE_SPR_UPI, - UNCORE_GNR_TYPE_15, - UNCORE_GNR_B2UPI, - UNCORE_GNR_TYPE_21, - UNCORE_GNR_TYPE_22, UNCORE_IGNORE_END }; @@ -6617,13 +6608,38 @@ static struct intel_uncore_type gnr_uncore_ubox = { .attr_update = uncore_alias_groups, }; +static struct intel_uncore_type gnr_uncore_pciex8 = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "pciex8", +}; + +static struct intel_uncore_type gnr_uncore_pciex16 = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "pciex16", +}; + +static struct intel_uncore_type gnr_uncore_upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "upi", +}; + +static struct intel_uncore_type gnr_uncore_b2upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "b2upi", +}; + +static struct intel_uncore_type gnr_uncore_b2hot = { + .name = "b2hot", + .attr_update = uncore_alias_groups, +}; + static struct intel_uncore_type gnr_uncore_b2cmi = { SPR_UNCORE_PCI_COMMON_FORMAT(), .name = "b2cmi", }; static struct intel_uncore_type gnr_uncore_b2cxl = { - SPR_UNCORE_MMIO_COMMON_FORMAT(), + SPR_UNCORE_MMIO_OFFS8_COMMON_FORMAT(), .name = "b2cxl", }; @@ -6641,21 +6657,21 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = { &gnr_uncore_ubox, &spr_uncore_imc, NULL, + &gnr_uncore_upi, NULL, NULL, NULL, + &spr_uncore_cxlcm, + &spr_uncore_cxldp, NULL, - NULL, - NULL, - NULL, - NULL, + &gnr_uncore_b2hot, &gnr_uncore_b2cmi, &gnr_uncore_b2cxl, - NULL, + &gnr_uncore_b2upi, NULL, &gnr_uncore_mdf_sbo, - NULL, - NULL, + &gnr_uncore_pciex16, + &gnr_uncore_pciex8, }; static struct freerunning_counters gnr_iio_freerunning[] = { diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 45b1866ff051..7f5007a4752a 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -3,6 +3,8 @@ #include <linux/sysfs.h> #include <linux/nospec.h> #include <asm/cpu_device_id.h> +#include <asm/msr.h> + #include "probe.h" enum perf_msr_id { @@ -231,7 +233,7 @@ static inline u64 msr_read_counter(struct perf_event *event) u64 now; if (event->hw.event_base) - rdmsrl(event->hw.event_base, now); + rdmsrq(event->hw.event_base, now); else now = rdtsc_ordered(); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 72b022a1e16c..2b969386dcdd 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -17,6 +17,7 @@ #include <asm/fpu/xstate.h> #include <asm/intel_ds.h> #include <asm/cpu.h> +#include <asm/msr.h> /* To enable MSR tracing please use the generic trace points. */ @@ -110,9 +111,26 @@ static inline bool is_topdown_event(struct perf_event *event) return is_metric_event(event) || is_slots_event(event); } +int is_x86_event(struct perf_event *event); + +static inline bool check_leader_group(struct perf_event *leader, int flags) +{ + return is_x86_event(leader) ? !!(leader->hw.flags & flags) : false; +} + static inline bool is_branch_counters_group(struct perf_event *event) { - return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; + return check_leader_group(event->group_leader, PERF_X86_EVENT_BRANCH_COUNTERS); +} + +static inline bool is_pebs_counter_event_group(struct perf_event *event) +{ + return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR); +} + +static inline bool is_acr_event_group(struct perf_event *event) +{ + return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR); } struct amd_nb { @@ -256,6 +274,7 @@ struct cpu_hw_events { struct event_constraint *event_constraint[X86_PMC_IDX_MAX]; int n_excl; /* the number of exclusive events */ + int n_late_setup; /* the num of events needs late setup */ unsigned int txn_flags; int is_fake; @@ -281,6 +300,10 @@ struct cpu_hw_events { u64 fixed_ctrl_val; u64 active_fixed_ctrl_val; + /* Intel ACR configuration */ + u64 acr_cfg_b[X86_PMC_IDX_MAX]; + u64 acr_cfg_c[X86_PMC_IDX_MAX]; + /* * Intel LBR bits */ @@ -476,6 +499,14 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID) +#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW) + +#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW) + /* Event constraint, but match on all event flags too. */ #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) @@ -616,6 +647,7 @@ union perf_capabilities { u64 pebs_output_pt_available:1; u64 pebs_timing_info:1; u64 anythread_deprecated:1; + u64 rdpmc_metrics_clear:1; }; u64 capabilities; }; @@ -655,28 +687,27 @@ enum { x86_lbr_exclusive_max, }; -#define PERF_PEBS_DATA_SOURCE_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_MAX 0x100 #define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) -enum hybrid_cpu_type { - HYBRID_INTEL_NONE, - HYBRID_INTEL_ATOM = 0x20, - HYBRID_INTEL_CORE = 0x40, -}; +#define X86_HYBRID_PMU_ATOM_IDX 0 +#define X86_HYBRID_PMU_CORE_IDX 1 +#define X86_HYBRID_PMU_TINY_IDX 2 enum hybrid_pmu_type { not_hybrid, - hybrid_small = BIT(0), - hybrid_big = BIT(1), - - hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */ + hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX), + hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX), + hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX), + + /* The belows are only used for matching */ + hybrid_big_small = hybrid_big | hybrid_small, + hybrid_small_tiny = hybrid_small | hybrid_tiny, + hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, }; -#define X86_HYBRID_PMU_ATOM_IDX 0 -#define X86_HYBRID_PMU_CORE_IDX 1 - -#define X86_HYBRID_NUM_PMUS 2 - struct x86_hybrid_pmu { struct pmu pmu; const char *name; @@ -684,9 +715,25 @@ struct x86_hybrid_pmu { cpumask_t supported_cpus; union perf_capabilities intel_cap; u64 intel_ctrl; - int max_pebs_events; - int num_counters; - int num_counters_fixed; + u64 pebs_events_mask; + u64 config_mask; + union { + u64 cntr_mask64; + unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 fixed_cntr_mask64; + unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + + union { + u64 acr_cntr_mask64; + unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cause_mask64; + unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; struct event_constraint unconstrained; u64 hw_cache_event_ids @@ -768,14 +815,35 @@ struct x86_pmu { u64 (*update)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); + void (*late_setup)(void); + void (*pebs_enable)(struct perf_event *event); + void (*pebs_disable)(struct perf_event *event); + void (*pebs_enable_all)(void); + void (*pebs_disable_all)(void); unsigned eventsel; unsigned perfctr; + unsigned fixedctr; int (*addr_offset)(int index, bool eventsel); int (*rdpmc_index)(int index); u64 (*event_map)(int); int max_events; - int num_counters; - int num_counters_fixed; + u64 config_mask; + union { + u64 cntr_mask64; + unsigned long cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 fixed_cntr_mask64; + unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cntr_mask64; + unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; + union { + u64 acr_cause_mask64; + unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + }; int cntval_bits; u64 cntval_mask; union { @@ -829,7 +897,7 @@ struct x86_pmu { void (*check_microcode)(void); void (*sched_task)(struct perf_event_pmu_context *pmu_ctx, - bool sched_in); + struct task_struct *task, bool sched_in); /* * Intel Arch Perfmon v2+ @@ -842,7 +910,7 @@ struct x86_pmu { */ unsigned int bts :1, bts_active :1, - pebs :1, + ds_pebs :1, pebs_active :1, pebs_broken :1, pebs_prec_dist :1, @@ -852,7 +920,7 @@ struct x86_pmu { pebs_ept :1; int pebs_record_size; int pebs_buffer_size; - int max_pebs_events; + u64 pebs_events_mask; void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); @@ -874,6 +942,7 @@ struct x86_pmu { const int *lbr_sel_map; /* lbr_select mappings */ int *lbr_ctl_map; /* LBR_CTL mappings */ }; + u64 lbr_callstack_users; /* lbr callstack system wide users */ bool lbr_double_abort; /* duplicated lbr aborts */ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ @@ -912,14 +981,6 @@ struct x86_pmu { int num_topdown_events; /* - * perf task context (i.e. struct perf_event_pmu_context::task_ctx_data) - * switch helper to bridge calls from perf/core to perf/x86. - * See struct pmu::swap_task_ctx() usage for examples; - */ - void (*swap_task_ctx)(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc); - - /* * AMD bits */ unsigned int amd_nb_constraints : 1; @@ -954,7 +1015,7 @@ struct x86_pmu { */ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; - enum hybrid_cpu_type (*get_hybrid_cpu_type) (void); + enum intel_cpu_type (*get_hybrid_cpu_type) (void); }; struct x86_perf_task_context_opt { @@ -1020,6 +1081,7 @@ do { \ #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ #define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ #define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ +#define PMU_FL_DYN_CONSTRAINT 0x800 /* Needs dynamic constraint */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -1062,11 +1124,18 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\ .pmu_type = _pmu, \ } +int is_x86_event(struct perf_event *event); struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly; DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period); DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update); +DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs); +DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup); +DECLARE_STATIC_CALL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable); +DECLARE_STATIC_CALL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable); +DECLARE_STATIC_CALL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all); +DECLARE_STATIC_CALL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all); static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx) { @@ -1108,6 +1177,12 @@ extern u64 __read_mostly hw_cache_extra_regs u64 x86_perf_event_update(struct perf_event *event); +static inline u64 intel_pmu_topdown_event_update(struct perf_event *event, u64 *val) +{ + return x86_perf_event_update(event); +} +DECLARE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update); + static inline unsigned int x86_pmu_config_addr(int index) { return x86_pmu.eventsel + (x86_pmu.addr_offset ? @@ -1120,13 +1195,19 @@ static inline unsigned int x86_pmu_event_addr(int index) x86_pmu.addr_offset(index, false) : index); } +static inline unsigned int x86_pmu_fixed_ctr_addr(int index) +{ + return x86_pmu.fixedctr + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, false) : index); +} + static inline int x86_pmu_rdpmc_index(int index) { return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; } -bool check_hw_exists(struct pmu *pmu, int num_counters, - int num_counters_fixed); +bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask, + unsigned long *fixed_cntr_mask); int x86_add_exclusive(unsigned int what); @@ -1162,16 +1243,16 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); if (hwc->extra_reg.reg) - wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); + wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config); /* * Add enabled Merge event on next counter * if large increment event being enabled on this counter */ if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); + wrmsrq(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); - wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); + wrmsrq(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } void x86_pmu_enable_all(int added); @@ -1187,18 +1268,42 @@ static inline void x86_pmu_disable_event(struct perf_event *event) u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config & ~disable_mask); + wrmsrq(hwc->config_base, hwc->config & ~disable_mask); if (is_counter_pair(hwc)) - wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); + wrmsrq(x86_pmu_config_addr(hwc->idx + 1), 0); } void x86_pmu_enable_event(struct perf_event *event); int x86_pmu_handle_irq(struct pt_regs *regs); -void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed, - u64 intel_ctrl); +void x86_pmu_show_pmu_cap(struct pmu *pmu); + +static inline int x86_pmu_num_counters(struct pmu *pmu) +{ + return hweight64(hybrid(pmu, cntr_mask64)); +} + +static inline int x86_pmu_max_num_counters(struct pmu *pmu) +{ + return fls64(hybrid(pmu, cntr_mask64)); +} + +static inline int x86_pmu_num_counters_fixed(struct pmu *pmu) +{ + return hweight64(hybrid(pmu, fixed_cntr_mask64)); +} + +static inline int x86_pmu_max_num_counters_fixed(struct pmu *pmu) +{ + return fls64(hybrid(pmu, fixed_cntr_mask64)); +} + +static inline u64 x86_pmu_get_event_config(struct perf_event *event) +{ + return event->attr.config & hybrid(event->pmu, config_mask); +} extern struct event_constraint emptyconstraint; @@ -1324,7 +1429,8 @@ void amd_pmu_lbr_reset(void); void amd_pmu_lbr_read(void); void amd_pmu_lbr_add(struct perf_event *event); void amd_pmu_lbr_del(struct perf_event *event); -void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); void amd_pmu_lbr_enable_all(void); void amd_pmu_lbr_disable_all(void); int amd_pmu_lbr_hw_config(struct perf_event *event); @@ -1333,12 +1439,12 @@ static __always_inline void __amd_pmu_lbr_disable(void) { u64 dbg_ctl, dbg_extn_cfg; - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); + rdmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + wrmsrq(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); } } @@ -1378,7 +1484,8 @@ static inline void amd_pmu_brs_del(struct perf_event *event) perf_sched_cb_dec(event->pmu); } -void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); #else static inline int amd_brs_init(void) { @@ -1403,7 +1510,8 @@ static inline void amd_pmu_brs_del(struct perf_event *event) { } -static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static inline void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { } @@ -1468,21 +1576,21 @@ static inline bool intel_pmu_has_bts(struct perf_event *event) static __always_inline void __intel_pmu_pebs_disable_all(void) { - wrmsrl(MSR_IA32_PEBS_ENABLE, 0); + wrmsrq(MSR_IA32_PEBS_ENABLE, 0); } static __always_inline void __intel_pmu_arch_lbr_disable(void) { - wrmsrl(MSR_ARCH_LBR_CTL, 0); + wrmsrq(MSR_ARCH_LBR_CTL, 0); } static __always_inline void __intel_pmu_lbr_disable(void) { u64 debugctl; - rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); - wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + wrmsrq(MSR_IA32_DEBUGCTLMSR, debugctl); } int intel_pmu_save_and_restart(struct perf_event *event); @@ -1517,9 +1625,15 @@ void intel_pmu_disable_bts(void); int intel_pmu_drain_bts_buffer(void); -u64 adl_latency_data_small(struct perf_event *event, u64 status); +void intel_pmu_late_setup(void); -u64 mtl_latency_data_small(struct perf_event *event, u64 status); +u64 grt_latency_data(struct perf_event *event, u64 status); + +u64 cmt_latency_data(struct perf_event *event, u64 status); + +u64 lnl_latency_data(struct perf_event *event, u64 status); + +u64 arl_h_latency_data(struct perf_event *event, u64 status); extern struct event_constraint intel_core2_pebs_event_constraints[]; @@ -1551,6 +1665,8 @@ extern struct event_constraint intel_icl_pebs_event_constraints[]; extern struct event_constraint intel_glc_pebs_event_constraints[]; +extern struct event_constraint intel_lnc_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_add(struct perf_event *event); @@ -1567,20 +1683,20 @@ void intel_pmu_pebs_disable_all(void); void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); -void intel_pmu_auto_reload_read(struct perf_event *event); +void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc); + +void intel_pmu_drain_pebs_buffer(void); void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); -void intel_ds_init(void); +void intel_pebs_init(void); void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, struct cpu_hw_events *cpuc, struct perf_event *event); -void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc); - -void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in); +void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in); u64 lbr_from_signext_quirk_wr(u64 val); @@ -1638,8 +1754,12 @@ void intel_pmu_pebs_data_source_grt(void); void intel_pmu_pebs_data_source_mtl(void); +void intel_pmu_pebs_data_source_arl_h(void); + void intel_pmu_pebs_data_source_cmt(void); +void intel_pmu_pebs_data_source_lnl(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); @@ -1661,6 +1781,17 @@ static inline int is_ht_workaround_enabled(void) return !!(x86_pmu.flags & PMU_FL_EXCL_ENABLED); } +static inline u64 intel_pmu_pebs_mask(u64 cntr_mask) +{ + return MAX_PEBS_EVENTS_MASK & cntr_mask; +} + +static inline int intel_pmu_max_num_pebs(struct pmu *pmu) +{ + static_assert(MAX_PEBS_EVENTS == 32); + return fls((u32)hybrid(pmu, pebs_events_mask)); +} + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h index 6c977c19f2cd..70078334e4a3 100644 --- a/arch/x86/events/perf_event_flags.h +++ b/arch/x86/events/perf_event_flags.h @@ -2,23 +2,24 @@ /* * struct hw_perf_event.flags flags */ -PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */ -PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */ -PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */ -PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */ -PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */ -PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */ -PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */ - /* 0x00080 */ -PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */ -PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */ -PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */ -PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */ -PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */ -PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */ -PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */ -PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */ -PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */ -PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */ -PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */ -PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */ +PERF_ARCH(PEBS_LDLAT, 0x0000001) /* ld+ldlat data address sampling */ +PERF_ARCH(PEBS_ST, 0x0000002) /* st data address sampling */ +PERF_ARCH(PEBS_ST_HSW, 0x0000004) /* haswell style datala, store */ +PERF_ARCH(PEBS_LD_HSW, 0x0000008) /* haswell style datala, load */ +PERF_ARCH(PEBS_NA_HSW, 0x0000010) /* haswell style datala, unknown */ +PERF_ARCH(EXCL, 0x0000020) /* HT exclusivity on counter */ +PERF_ARCH(DYNAMIC, 0x0000040) /* dynamic alloc'd constraint */ +PERF_ARCH(PEBS_CNTR, 0x0000080) /* PEBS counters snapshot */ +PERF_ARCH(EXCL_ACCT, 0x0000100) /* accounted EXCL event */ +PERF_ARCH(AUTO_RELOAD, 0x0000200) /* use PEBS auto-reload */ +PERF_ARCH(LARGE_PEBS, 0x0000400) /* use large PEBS */ +PERF_ARCH(PEBS_VIA_PT, 0x0000800) /* use PT buffer for PEBS */ +PERF_ARCH(PAIR, 0x0001000) /* Large Increment per Cycle */ +PERF_ARCH(LBR_SELECT, 0x0002000) /* Save/Restore MSR_LBR_SELECT */ +PERF_ARCH(TOPDOWN, 0x0004000) /* Count Topdown slots/metrics events */ +PERF_ARCH(PEBS_STLAT, 0x0008000) /* st+stlat data address sampling */ +PERF_ARCH(AMD_BRS, 0x0010000) /* AMD Branch Sampling */ +PERF_ARCH(PEBS_LAT_HYBRID, 0x0020000) /* ld and st lat for hybrid */ +PERF_ARCH(NEEDS_BRANCH_STACK, 0x0040000) /* require branch stack setup */ +PERF_ARCH(BRANCH_COUNTERS, 0x0080000) /* logs the counters in the extra space of each branch */ +PERF_ARCH(ACR, 0x0100000) /* Auto counter reload */ diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c index 600bf8d15c0c..bb719d0d3f0b 100644 --- a/arch/x86/events/probe.c +++ b/arch/x86/events/probe.c @@ -2,6 +2,8 @@ #include <linux/export.h> #include <linux/types.h> #include <linux/bits.h> + +#include <asm/msr.h> #include "probe.h" static umode_t @@ -43,7 +45,7 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) if (msr[bit].test && !msr[bit].test(bit, data)) continue; /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ - if (rdmsrl_safe(msr[bit].msr, &val)) + if (rdmsrq_safe(msr[bit].msr, &val)) continue; mask = msr[bit].mask; diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index ca5f687fa420..defd86137f12 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -39,6 +39,10 @@ * event: rapl_energy_psys * perf code: 0x5 * + * core counter: consumption of a single physical core + * event: rapl_energy_core (power_core PMU) + * perf code: 0x1 + * * We manage those counters as free running (read-only). They may be * use simultaneously by other tools, such as turbostat. * @@ -61,26 +65,32 @@ #include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/msr.h> #include "perf_event.h" #include "probe.h" +MODULE_DESCRIPTION("Support Intel/AMD RAPL energy consumption counters"); MODULE_LICENSE("GPL"); /* * RAPL energy status counters */ -enum perf_rapl_events { +enum perf_rapl_pkg_events { PERF_RAPL_PP0 = 0, /* all cores */ PERF_RAPL_PKG, /* entire package */ PERF_RAPL_RAM, /* DRAM */ PERF_RAPL_PP1, /* gpu */ PERF_RAPL_PSYS, /* psys */ - PERF_RAPL_MAX, - NR_RAPL_DOMAINS = PERF_RAPL_MAX, + PERF_RAPL_PKG_EVENTS_MAX, + NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX, }; -static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { +#define PERF_RAPL_CORE 0 /* single core */ +#define PERF_RAPL_CORE_EVENTS_MAX 1 +#define NR_RAPL_CORE_DOMAINS PERF_RAPL_CORE_EVENTS_MAX + +static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = { "pp0-core", "package", "dram", @@ -88,6 +98,8 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "psys", }; +static const char *const rapl_core_domain_name __initconst = "core"; + /* * event code: LSB 8 bits, passed in attr->config * any other bit is reserved @@ -102,6 +114,19 @@ static struct perf_pmu_events_attr event_attr_##v = { \ .event_str = str, \ }; +/* + * RAPL Package energy counter scope: + * 1. AMD/HYGON platforms have a per-PKG package energy counter + * 2. For Intel platforms + * 2.1. CLX-AP is multi-die and its RAPL MSRs are die-scope + * 2.2. Other Intel platforms are single die systems so the scope can be + * considered as either pkg-scope or die-scope, and we are considering + * them as die-scope. + */ +#define rapl_pkg_pmu_is_pkg_scope() \ + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \ + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + struct rapl_pmu { raw_spinlock_t lock; int n_active; @@ -114,8 +139,9 @@ struct rapl_pmu { struct rapl_pmus { struct pmu pmu; - unsigned int maxdie; - struct rapl_pmu *pmus[] __counted_by(maxdie); + unsigned int nr_rapl_pmu; + unsigned int cntr_mask; + struct rapl_pmu *rapl_pmu[] __counted_by(nr_rapl_pmu); }; enum rapl_unit_quirk { @@ -125,51 +151,66 @@ enum rapl_unit_quirk { }; struct rapl_model { - struct perf_msr *rapl_msrs; - unsigned long events; + struct perf_msr *rapl_pkg_msrs; + struct perf_msr *rapl_core_msrs; + unsigned long pkg_events; + unsigned long core_events; unsigned int msr_power_unit; enum rapl_unit_quirk unit_quirk; }; /* 1/2^hw_unit Joule */ -static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; -static struct rapl_pmus *rapl_pmus; -static cpumask_t rapl_cpu_mask; -static unsigned int rapl_cntr_mask; +static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly; +static int rapl_core_hw_unit __read_mostly; +static struct rapl_pmus *rapl_pmus_pkg; +static struct rapl_pmus *rapl_pmus_core; static u64 rapl_timer_ms; -static struct perf_msr *rapl_msrs; +static struct rapl_model *rapl_model; -static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) +/* + * Helper function to get the correct topology id according to the + * RAPL PMU scope. + */ +static inline unsigned int get_rapl_pmu_idx(int cpu, int scope) { - unsigned int dieid = topology_logical_die_id(cpu); - /* - * The unsigned check also catches the '-1' return value for non - * existent mappings in the topology map. + * Returns unsigned int, which converts the '-1' return value + * (for non-existent mappings in topology map) to UINT_MAX, so + * the error check in the caller is simplified. */ - return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL; + switch (scope) { + case PERF_PMU_SCOPE_PKG: + return topology_logical_package_id(cpu); + case PERF_PMU_SCOPE_DIE: + return topology_logical_die_id(cpu); + case PERF_PMU_SCOPE_CORE: + return topology_logical_core_id(cpu); + default: + return -EINVAL; + } } static inline u64 rapl_read_counter(struct perf_event *event) { u64 raw; - rdmsrl(event->hw.event_base, raw); + rdmsrq(event->hw.event_base, raw); return raw; } -static inline u64 rapl_scale(u64 v, int cfg) +static inline u64 rapl_scale(u64 v, struct perf_event *event) { - if (cfg > NR_RAPL_DOMAINS) { - pr_warn("Invalid domain %d, failed to scale data\n", cfg); - return v; - } + int hw_unit = rapl_pkg_hw_unit[event->hw.config - 1]; + + if (event->pmu->scope == PERF_PMU_SCOPE_CORE) + hw_unit = rapl_core_hw_unit; + /* * scale delta to smallest unit (1/2^32) * users must then scale back: count * 1/(1e9*2^32) to get Joules * or use ldexp(count, -32). * Watts = Joules/Time delta */ - return v << (32 - rapl_hw_unit[cfg - 1]); + return v << (32 - hw_unit); } static u64 rapl_event_update(struct perf_event *event) @@ -181,7 +222,7 @@ static u64 rapl_event_update(struct perf_event *event) prev_raw_count = local64_read(&hwc->prev_count); do { - rdmsrl(event->hw.event_base, new_raw_count); + rdmsrq(event->hw.event_base, new_raw_count); } while (!local64_try_cmpxchg(&hwc->prev_count, &prev_raw_count, new_raw_count)); @@ -196,7 +237,7 @@ static u64 rapl_event_update(struct perf_event *event) delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - sdelta = rapl_scale(delta, event->hw.config); + sdelta = rapl_scale(delta, event); local64_add(sdelta, &event->count); @@ -211,34 +252,33 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { - struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); + struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); struct perf_event *event; unsigned long flags; - if (!pmu->n_active) + if (!rapl_pmu->n_active) return HRTIMER_NORESTART; - raw_spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&rapl_pmu->lock, flags); - list_for_each_entry(event, &pmu->active_list, active_entry) + list_for_each_entry(event, &rapl_pmu->active_list, active_entry) rapl_event_update(event); - raw_spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); - hrtimer_forward_now(hrtimer, pmu->timer_interval); + hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval); return HRTIMER_RESTART; } -static void rapl_hrtimer_init(struct rapl_pmu *pmu) +static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu) { - struct hrtimer *hr = &pmu->hrtimer; + struct hrtimer *hr = &rapl_pmu->hrtimer; - hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hr->function = rapl_hrtimer_handle; + hrtimer_setup(hr, rapl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL); } -static void __rapl_pmu_event_start(struct rapl_pmu *pmu, +static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu, struct perf_event *event) { if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) @@ -246,39 +286,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, event->hw.state = 0; - list_add_tail(&event->active_entry, &pmu->active_list); + list_add_tail(&event->active_entry, &rapl_pmu->active_list); local64_set(&event->hw.prev_count, rapl_read_counter(event)); - pmu->n_active++; - if (pmu->n_active == 1) - rapl_start_hrtimer(pmu); + rapl_pmu->n_active++; + if (rapl_pmu->n_active == 1) + rapl_start_hrtimer(rapl_pmu); } static void rapl_pmu_event_start(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = event->pmu_private; + struct rapl_pmu *rapl_pmu = event->pmu_private; unsigned long flags; - raw_spin_lock_irqsave(&pmu->lock, flags); - __rapl_pmu_event_start(pmu, event); - raw_spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_lock_irqsave(&rapl_pmu->lock, flags); + __rapl_pmu_event_start(rapl_pmu, event); + raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); } static void rapl_pmu_event_stop(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = event->pmu_private; + struct rapl_pmu *rapl_pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - raw_spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&rapl_pmu->lock, flags); /* mark event as deactivated and stopped */ if (!(hwc->state & PERF_HES_STOPPED)) { - WARN_ON_ONCE(pmu->n_active <= 0); - pmu->n_active--; - if (pmu->n_active == 0) - hrtimer_cancel(&pmu->hrtimer); + WARN_ON_ONCE(rapl_pmu->n_active <= 0); + rapl_pmu->n_active--; + if (rapl_pmu->n_active == 0) + hrtimer_cancel(&rapl_pmu->hrtimer); list_del(&event->active_entry); @@ -296,23 +336,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } - raw_spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); } static int rapl_pmu_event_add(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = event->pmu_private; + struct rapl_pmu *rapl_pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - raw_spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&rapl_pmu->lock, flags); hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (mode & PERF_EF_START) - __rapl_pmu_event_start(pmu, event); + __rapl_pmu_event_start(rapl_pmu, event); - raw_spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); return 0; } @@ -325,13 +365,19 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags) static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; - int bit, ret = 0; - struct rapl_pmu *pmu; + int bit, rapl_pmus_scope, ret = 0; + struct rapl_pmu *rapl_pmu; + unsigned int rapl_pmu_idx; + struct rapl_pmus *rapl_pmus; /* only look at RAPL events */ - if (event->attr.type != rapl_pmus->pmu.type) + if (event->attr.type != event->pmu->type) return -ENOENT; + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + /* check only supported bits are set */ if (event->attr.config & ~RAPL_EVENT_MASK) return -EINVAL; @@ -339,29 +385,41 @@ static int rapl_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; - event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - - if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) + rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu); + if (!rapl_pmus) + return -EINVAL; + rapl_pmus_scope = rapl_pmus->pmu.scope; + + if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) { + cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1); + if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) + return -EINVAL; + + bit = cfg - 1; + event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr; + } else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) { + cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1); + if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) + return -EINVAL; + + bit = cfg - 1; + event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr; + } else return -EINVAL; - - cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); - bit = cfg - 1; /* check event supported */ - if (!(rapl_cntr_mask & (1 << bit))) + if (!(rapl_pmus->cntr_mask & (1 << bit))) return -EINVAL; - /* unsupported modes and filters */ - if (event->attr.sample_period) /* no sampling */ + rapl_pmu_idx = get_rapl_pmu_idx(event->cpu, rapl_pmus_scope); + if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu) return -EINVAL; - /* must be done before validate_group */ - pmu = cpu_to_rapl_pmu(event->cpu); - if (!pmu) + rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx]; + if (!rapl_pmu) return -EINVAL; - event->cpu = pmu->cpu; - event->pmu_private = pmu; - event->hw.event_base = rapl_msrs[bit].msr; + + event->pmu_private = rapl_pmu; event->hw.config = cfg; event->hw.idx = bit; @@ -373,34 +431,19 @@ static void rapl_pmu_event_read(struct perf_event *event) rapl_event_update(event); } -static ssize_t rapl_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); -} - -static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); - -static struct attribute *rapl_pmu_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group rapl_pmu_attr_group = { - .attrs = rapl_pmu_attrs, -}; - RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); +RAPL_EVENT_ATTR_STR(energy-core, rapl_core, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-core.unit, rapl_core_unit, "Joules"); /* * we compute in 0.23 nJ increments regardless of MSR @@ -410,6 +453,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-core.scale, rapl_core_scale, "2.3283064365386962890625e-10"); /* * There are no default events, but we need to create @@ -437,7 +481,12 @@ static struct attribute_group rapl_pmu_format_group = { }; static const struct attribute_group *rapl_attr_groups[] = { - &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +}; + +static const struct attribute_group *rapl_core_attr_groups[] = { &rapl_pmu_format_group, &rapl_pmu_events_group, NULL, @@ -503,6 +552,18 @@ static struct attribute_group rapl_events_psys_group = { .attrs = rapl_events_psys, }; +static struct attribute *rapl_events_core[] = { + EVENT_PTR(rapl_core), + EVENT_PTR(rapl_core_unit), + EVENT_PTR(rapl_core_scale), + NULL, +}; + +static struct attribute_group rapl_events_core_group = { + .name = "events", + .attrs = rapl_events_core, +}; + static bool test_msr(int idx, void *data) { return test_bit(idx, (unsigned long *) data); @@ -528,11 +589,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { }; /* - * Force to PERF_RAPL_MAX size due to: - * - perf_msr_probe(PERF_RAPL_MAX) + * Force to PERF_RAPL_PKG_EVENTS_MAX size due to: + * - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX) * - want to use same event codes across both architectures */ -static struct perf_msr amd_rapl_msrs[] = { +static struct perf_msr amd_rapl_pkg_msrs[] = { [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 }, [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 }, @@ -540,72 +601,25 @@ static struct perf_msr amd_rapl_msrs[] = { [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, }; -static int rapl_cpu_offline(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - int target; - - /* Check if exiting cpu is used for collecting rapl events */ - if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) - return 0; - - pmu->cpu = -1; - /* Find a new cpu to collect rapl events */ - target = cpumask_any_but(topology_die_cpumask(cpu), cpu); - - /* Migrate rapl events to the new target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &rapl_cpu_mask); - pmu->cpu = target; - perf_pmu_migrate_context(pmu->pmu, cpu, target); - } - return 0; -} - -static int rapl_cpu_online(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - int target; - - if (!pmu) { - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) - return -ENOMEM; - - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - rapl_hrtimer_init(pmu); - - rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu; - } - - /* - * Check if there is an online cpu in the package which collects rapl - * events already. - */ - target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu)); - if (target < nr_cpu_ids) - return 0; - - cpumask_set_cpu(cpu, &rapl_cpu_mask); - pmu->cpu = cpu; - return 0; -} +static struct perf_msr amd_rapl_core_msrs[] = { + [PERF_RAPL_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_core_group, + test_msr, false, RAPL_MSR_MASK }, +}; -static int rapl_check_hw_unit(struct rapl_model *rm) +static int rapl_check_hw_unit(void) { u64 msr_rapl_power_unit_bits; int i; - /* protect rdmsrl() to handle virtualization */ - if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits)) + /* protect rdmsrq() to handle virtualization */ + if (rdmsrq_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits)) return -1; - for (i = 0; i < NR_RAPL_DOMAINS; i++) - rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) + rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + + rapl_core_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; - switch (rm->unit_quirk) { + switch (rapl_model->unit_quirk) { /* * DRAM domain on HSW server and KNL has fixed energy unit which can be * different than the unit from power unit MSR. See @@ -613,17 +627,16 @@ static int rapl_check_hw_unit(struct rapl_model *rm) * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ case RAPL_UNIT_QUIRK_INTEL_HSW: - rapl_hw_unit[PERF_RAPL_RAM] = 16; + rapl_pkg_hw_unit[PERF_RAPL_RAM] = 16; break; /* SPR uses a fixed energy unit for Psys domain. */ case RAPL_UNIT_QUIRK_INTEL_SPR: - rapl_hw_unit[PERF_RAPL_PSYS] = 0; + rapl_pkg_hw_unit[PERF_RAPL_PSYS] = 0; break; default: break; } - /* * Calculate the timer rate: * Use reference of 200W for scaling the timeout to avoid counter @@ -632,9 +645,9 @@ static int rapl_check_hw_unit(struct rapl_model *rm) * if hw unit is 32, then we use 2 ms 1/200/2 */ rapl_timer_ms = 2; - if (rapl_hw_unit[0] < 32) { + if (rapl_pkg_hw_unit[0] < 32) { rapl_timer_ms = (1000 / (2 * 100)); - rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1)); + rapl_timer_ms *= (1ULL << (32 - rapl_pkg_hw_unit[0] - 1)); } return 0; } @@ -642,24 +655,32 @@ static int rapl_check_hw_unit(struct rapl_model *rm) static void __init rapl_advertise(void) { int i; + int num_counters = hweight32(rapl_pmus_pkg->cntr_mask); + + if (rapl_pmus_core) + num_counters += hweight32(rapl_pmus_core->cntr_mask); pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", - hweight32(rapl_cntr_mask), rapl_timer_ms); + num_counters, rapl_timer_ms); - for (i = 0; i < NR_RAPL_DOMAINS; i++) { - if (rapl_cntr_mask & (1 << i)) { + for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) { + if (rapl_pmus_pkg->cntr_mask & (1 << i)) { pr_info("hw unit of domain %s 2^-%d Joules\n", - rapl_domain_names[i], rapl_hw_unit[i]); + rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]); } } + + if (rapl_pmus_core && (rapl_pmus_core->cntr_mask & (1 << PERF_RAPL_CORE))) + pr_info("hw unit of domain %s 2^-%d Joules\n", + rapl_core_domain_name, rapl_core_hw_unit); } -static void cleanup_rapl_pmus(void) +static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus) { int i; - for (i = 0; i < rapl_pmus->maxdie; i++) - kfree(rapl_pmus->pmus[i]); + for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++) + kfree(rapl_pmus->rapl_pmu[i]); kfree(rapl_pmus); } @@ -672,15 +693,62 @@ static const struct attribute_group *rapl_attr_update[] = { NULL, }; -static int __init init_rapl_pmus(void) +static const struct attribute_group *rapl_core_attr_update[] = { + &rapl_events_core_group, + NULL, +}; + +static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus) +{ + struct rapl_pmu *rapl_pmu; + int idx; + + for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) { + rapl_pmu = kzalloc(sizeof(*rapl_pmu), GFP_KERNEL); + if (!rapl_pmu) + goto free; + + raw_spin_lock_init(&rapl_pmu->lock); + INIT_LIST_HEAD(&rapl_pmu->active_list); + rapl_pmu->pmu = &rapl_pmus->pmu; + rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(rapl_pmu); + + rapl_pmus->rapl_pmu[idx] = rapl_pmu; + } + + return 0; +free: + for (; idx > 0; idx--) + kfree(rapl_pmus->rapl_pmu[idx - 1]); + return -ENOMEM; +} + +static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope, + const struct attribute_group **rapl_attr_groups, + const struct attribute_group **rapl_attr_update) { - int maxdie = topology_max_packages() * topology_max_dies_per_package(); + int nr_rapl_pmu = topology_max_packages(); + struct rapl_pmus *rapl_pmus; + int ret; + + /* + * rapl_pmu_scope must be either PKG, DIE or CORE + */ + if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE) + nr_rapl_pmu *= topology_max_dies_per_package(); + else if (rapl_pmu_scope == PERF_PMU_SCOPE_CORE) + nr_rapl_pmu *= topology_num_cores_per_package(); + else if (rapl_pmu_scope != PERF_PMU_SCOPE_PKG) + return -EINVAL; - rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, maxdie), GFP_KERNEL); + rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL); if (!rapl_pmus) return -ENOMEM; - rapl_pmus->maxdie = maxdie; + *rapl_pmus_ptr = rapl_pmus; + + rapl_pmus->nr_rapl_pmu = nr_rapl_pmu; rapl_pmus->pmu.attr_groups = rapl_attr_groups; rapl_pmus->pmu.attr_update = rapl_attr_update; rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; @@ -690,125 +758,134 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.read = rapl_pmu_event_read; + rapl_pmus->pmu.scope = rapl_pmu_scope; rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; - return 0; + + ret = init_rapl_pmu(rapl_pmus); + if (ret) + kfree(rapl_pmus); + + return ret; } static struct rapl_model model_snb = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_PP1), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_snbep = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_hsw = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_hsx = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_knl = { - .events = BIT(PERF_RAPL_PKG) | + .pkg_events = BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_skl = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1) | BIT(PERF_RAPL_PSYS), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_spr = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PSYS), .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_spr_msrs, + .rapl_pkg_msrs = intel_rapl_spr_msrs, }; static struct rapl_model model_amd_hygon = { - .events = BIT(PERF_RAPL_PKG), + .pkg_events = BIT(PERF_RAPL_PKG), + .core_events = BIT(PERF_RAPL_CORE), .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, - .rapl_msrs = amd_rapl_msrs, + .rapl_pkg_msrs = amd_rapl_pkg_msrs, + .rapl_core_msrs = amd_rapl_core_msrs, }; static const struct x86_cpu_id rapl_model_match[] __initconst = { - X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon), - X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &model_snb), - X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &model_snbep), - X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &model_snb), - X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &model_snbep), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &model_knl), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &model_knl), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), - X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), - X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE_H, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, &model_skl), + X86_MATCH_FEATURE(X86_FEATURE_RAPL, &model_amd_hygon), + X86_MATCH_VFM(INTEL_SANDYBRIDGE, &model_snb), + X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &model_snbep), + X86_MATCH_VFM(INTEL_IVYBRIDGE, &model_snb), + X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &model_snbep), + X86_MATCH_VFM(INTEL_HASWELL, &model_hsw), + X86_MATCH_VFM(INTEL_HASWELL_X, &model_hsx), + X86_MATCH_VFM(INTEL_HASWELL_L, &model_hsw), + X86_MATCH_VFM(INTEL_HASWELL_G, &model_hsw), + X86_MATCH_VFM(INTEL_BROADWELL, &model_hsw), + X86_MATCH_VFM(INTEL_BROADWELL_G, &model_hsw), + X86_MATCH_VFM(INTEL_BROADWELL_X, &model_hsx), + X86_MATCH_VFM(INTEL_BROADWELL_D, &model_hsx), + X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &model_knl), + X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &model_knl), + X86_MATCH_VFM(INTEL_SKYLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_SKYLAKE, &model_skl), + X86_MATCH_VFM(INTEL_SKYLAKE_X, &model_hsx), + X86_MATCH_VFM(INTEL_KABYLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_KABYLAKE, &model_skl), + X86_MATCH_VFM(INTEL_CANNONLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &model_hsw), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &model_hsw), + X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &model_hsw), + X86_MATCH_VFM(INTEL_ICELAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_ICELAKE, &model_skl), + X86_MATCH_VFM(INTEL_ICELAKE_D, &model_hsx), + X86_MATCH_VFM(INTEL_ICELAKE_X, &model_hsx), + X86_MATCH_VFM(INTEL_COMETLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_COMETLAKE, &model_skl), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_TIGERLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ALDERLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &model_skl), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &model_spr), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &model_spr), + X86_MATCH_VFM(INTEL_RAPTORLAKE, &model_skl), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &model_skl), + X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &model_skl), + X86_MATCH_VFM(INTEL_METEORLAKE, &model_skl), + X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl), + X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); @@ -816,57 +893,73 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) { const struct x86_cpu_id *id; - struct rapl_model *rm; + int rapl_pkg_pmu_scope = PERF_PMU_SCOPE_DIE; int ret; + if (rapl_pkg_pmu_is_pkg_scope()) + rapl_pkg_pmu_scope = PERF_PMU_SCOPE_PKG; + id = x86_match_cpu(rapl_model_match); if (!id) return -ENODEV; - rm = (struct rapl_model *) id->driver_data; - - rapl_msrs = rm->rapl_msrs; + rapl_model = (struct rapl_model *) id->driver_data; - rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, - false, (void *) &rm->events); - - ret = rapl_check_hw_unit(rm); + ret = rapl_check_hw_unit(); if (ret) return ret; - ret = init_rapl_pmus(); + ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope, rapl_attr_groups, + rapl_attr_update); if (ret) return ret; - /* - * Install callbacks. Core will call them for each online cpu. - */ - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, - "perf/x86/rapl:online", - rapl_cpu_online, rapl_cpu_offline); + rapl_pmus_pkg->cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, + PERF_RAPL_PKG_EVENTS_MAX, false, + (void *) &rapl_model->pkg_events); + + ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1); if (ret) goto out; - ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); - if (ret) - goto out1; + if (rapl_model->core_events) { + ret = init_rapl_pmus(&rapl_pmus_core, PERF_PMU_SCOPE_CORE, + rapl_core_attr_groups, + rapl_core_attr_update); + if (ret) { + pr_warn("power-core PMU initialization failed (%d)\n", ret); + goto core_init_failed; + } + + rapl_pmus_core->cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs, + PERF_RAPL_CORE_EVENTS_MAX, false, + (void *) &rapl_model->core_events); + + ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_core", -1); + if (ret) { + pr_warn("power-core PMU registration failed (%d)\n", ret); + cleanup_rapl_pmus(rapl_pmus_core); + } + } +core_init_failed: rapl_advertise(); return 0; -out1: - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); - cleanup_rapl_pmus(); + cleanup_rapl_pmus(rapl_pmus_pkg); return ret; } module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - perf_pmu_unregister(&rapl_pmus->pmu); - cleanup_rapl_pmus(); + if (rapl_pmus_core) { + perf_pmu_unregister(&rapl_pmus_core->pmu); + cleanup_rapl_pmus(rapl_pmus_core); + } + perf_pmu_unregister(&rapl_pmus_pkg->pmu); + cleanup_rapl_pmus(rapl_pmus_pkg); } module_exit(intel_rapl_exit); diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c index dab4ed199227..77fd00b3305e 100644 --- a/arch/x86/events/utils.c +++ b/arch/x86/events/utils.c @@ -2,6 +2,7 @@ #include <asm/insn.h> #include <linux/mm.h> +#include <asm/msr.h> #include "perf_event.h" static int decode_branch_type(struct insn *insn) diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c index 3e9acdaeed1e..4bdfcf091200 100644 --- a/arch/x86/events/zhaoxin/core.c +++ b/arch/x86/events/zhaoxin/core.c @@ -15,6 +15,7 @@ #include <asm/cpufeature.h> #include <asm/hardirq.h> #include <asm/apic.h> +#include <asm/msr.h> #include "../perf_event.h" @@ -254,26 +255,26 @@ static __initconst const u64 zxe_hw_cache_event_ids static void zhaoxin_pmu_disable_all(void) { - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0); } static void zhaoxin_pmu_enable_all(int added) { - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); } static inline u64 zhaoxin_pmu_get_status(void) { u64 status; - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status); return status; } static inline void zhaoxin_pmu_ack_status(u64 ack) { - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); + wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } static inline void zxc_pmu_ack_status(u64 ack) @@ -293,9 +294,9 @@ static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc) mask = 0xfULL << (idx * 4); - rdmsrl(hwc->config_base, ctrl_val); + rdmsrq(hwc->config_base, ctrl_val); ctrl_val &= ~mask; - wrmsrl(hwc->config_base, ctrl_val); + wrmsrq(hwc->config_base, ctrl_val); } static void zhaoxin_pmu_disable_event(struct perf_event *event) @@ -329,10 +330,10 @@ static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc) bits <<= (idx * 4); mask = 0xfULL << (idx * 4); - rdmsrl(hwc->config_base, ctrl_val); + rdmsrq(hwc->config_base, ctrl_val); ctrl_val &= ~mask; ctrl_val |= bits; - wrmsrl(hwc->config_base, ctrl_val); + wrmsrq(hwc->config_base, ctrl_val); } static void zhaoxin_pmu_enable_event(struct perf_event *event) @@ -397,8 +398,7 @@ again: if (!x86_perf_event_set_period(event)) continue; - if (perf_event_overflow(event, &data, regs)) - x86_pmu_stop(event, 0); + perf_event_overflow(event, &data, regs); } /* @@ -530,13 +530,13 @@ __init int zhaoxin_pmu_init(void) pr_info("Version check pass!\n"); x86_pmu.version = version; - x86_pmu.num_counters = eax.split.num_counters; + x86_pmu.cntr_mask64 = GENMASK_ULL(eax.split.num_counters - 1, 0); x86_pmu.cntval_bits = eax.split.bit_width; x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; x86_pmu.events_maskl = ebx.full; x86_pmu.events_mask_len = eax.split.mask_length; - x86_pmu.num_counters_fixed = edx.split.num_counters_fixed; + x86_pmu.fixed_cntr_mask64 = GENMASK_ULL(edx.split.num_counters_fixed - 1, 0); x86_add_quirk(zhaoxin_arch_events_quirk); switch (boot_cpu_data.x86) { @@ -604,13 +604,13 @@ __init int zhaoxin_pmu_init(void) return -ENODEV; } - x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1; - x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + x86_pmu.intel_ctrl = x86_pmu.cntr_mask64; + x86_pmu.intel_ctrl |= x86_pmu.fixed_cntr_mask64 << INTEL_PMC_IDX_FIXED; if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; - c->weight += x86_pmu.num_counters; + c->idxmsk64 |= x86_pmu.cntr_mask64; + c->weight += x86_pmu_num_counters(NULL); } } |