diff options
Diffstat (limited to 'arch/x86/events')
-rw-r--r-- | arch/x86/events/amd/core.c | 10 | ||||
-rw-r--r-- | arch/x86/events/amd/uncore.c | 5 | ||||
-rw-r--r-- | arch/x86/events/core.c | 127 | ||||
-rw-r--r-- | arch/x86/events/intel/bts.c | 3 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 168 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 142 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 21 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.c | 111 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.h | 6 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 9 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snb.c | 185 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 34 | ||||
-rw-r--r-- | arch/x86/events/rapl.c | 159 |
14 files changed, 662 insertions, 320 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 920e3a640cad..b4a1a2576510 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -943,11 +943,12 @@ static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, u static int amd_pmu_v2_handle_irq(struct pt_regs *regs) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + static atomic64_t status_warned = ATOMIC64_INIT(0); + u64 reserved, status, mask, new_bits, prev_bits; struct perf_sample_data data; struct hw_perf_event *hwc; struct perf_event *event; int handled = 0, idx; - u64 reserved, status, mask; bool pmu_enabled; /* @@ -1012,7 +1013,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) * the corresponding PMCs are expected to be inactive according to the * active_mask */ - WARN_ON(status > 0); + if (status > 0) { + prev_bits = atomic64_fetch_or(status, &status_warned); + // A new bit was set for the very first time. + new_bits = status & ~prev_bits; + WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits); + } /* Clear overflow and freeze bits */ amd_pmu_ack_global_status(~status); diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 0bfde2ea5cb8..49c26ce2b115 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -916,7 +916,8 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 }; union amd_uncore_info info; struct amd_uncore_pmu *pmu; - int index = 0, gid, i; + int gid, i; + u16 index = 0; if (pmu_version < 2) return 0; @@ -948,7 +949,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) { for (i = 0; i < group_num_pmus[gid]; i++) { pmu = &uncore->pmus[index]; - snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index); + snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%hu", index); pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid]; pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2; pmu->rdpmc_base = -1; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index be01823b1bb4..c75c482d4c52 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -41,6 +41,8 @@ #include <asm/desc.h> #include <asm/ldt.h> #include <asm/unwind.h> +#include <asm/uprobes.h> +#include <asm/ibt.h> #include "perf_event.h" @@ -2816,6 +2818,46 @@ static unsigned long get_segment_base(unsigned int segment) return get_desc_base(desc); } +#ifdef CONFIG_UPROBES +/* + * Heuristic-based check if uprobe is installed at the function entry. + * + * Under assumption of user code being compiled with frame pointers, + * `push %rbp/%ebp` is a good indicator that we indeed are. + * + * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern. + * If we get this wrong, captured stack trace might have one extra bogus + * entry, but the rest of stack trace will still be meaningful. + */ +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + struct arch_uprobe *auprobe; + + if (!current->utask) + return false; + + auprobe = current->utask->auprobe; + if (!auprobe) + return false; + + /* push %rbp/%ebp */ + if (auprobe->insn[0] == 0x55) + return true; + + /* endbr64 (64-bit only) */ + if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn)) + return true; + + return false; +} + +#else +static bool is_uprobe_at_func_entry(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_UPROBES */ + #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> @@ -2827,6 +2869,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent unsigned long ss_base, cs_base; struct stack_frame_ia32 frame; const struct stack_frame_ia32 __user *fp; + u32 ret_addr; if (user_64bit_mode(regs)) return 0; @@ -2836,6 +2879,12 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent fp = compat_ptr(ss_base + regs->bp); pagefault_disable(); + + /* see perf_callchain_user() below for why we do this */ + if (is_uprobe_at_func_entry(regs) && + !get_user(ret_addr, (const u32 __user *)regs->sp)) + perf_callchain_store(entry, ret_addr); + while (entry->nr < entry->max_stack) { if (!valid_user_frame(fp, sizeof(frame))) break; @@ -2864,6 +2913,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs { struct stack_frame frame; const struct stack_frame __user *fp; + unsigned long ret_addr; if (perf_guest_state()) { /* TODO: We don't support guest os callchain now */ @@ -2887,6 +2937,19 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs return; pagefault_disable(); + + /* + * If we are called from uprobe handler, and we are indeed at the very + * entry to user function (which is normally a `push %rbp` instruction, + * under assumption of application being compiled with frame pointers), + * we should read return address from *regs->sp before proceeding + * to follow frame pointers, otherwise we'll skip immediate caller + * as %rbp is not yet setup. + */ + if (is_uprobe_at_func_entry(regs) && + !get_user(ret_addr, (const unsigned long __user *)regs->sp)) + perf_callchain_store(entry, ret_addr); + while (entry->nr < entry->max_stack) { if (!valid_user_frame(fp, sizeof(frame))) break; @@ -2940,35 +3003,57 @@ static unsigned long code_segment_base(struct pt_regs *regs) return 0; } -unsigned long perf_instruction_pointer(struct pt_regs *regs) +unsigned long perf_arch_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_state()) - return perf_guest_get_ip(); - return regs->ip + code_segment_base(regs); } -unsigned long perf_misc_flags(struct pt_regs *regs) +static unsigned long common_misc_flags(struct pt_regs *regs) { - unsigned int guest_state = perf_guest_state(); - int misc = 0; + if (regs->flags & PERF_EFLAGS_EXACT) + return PERF_RECORD_MISC_EXACT_IP; - if (guest_state) { - if (guest_state & PERF_GUEST_USER) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } + return 0; +} - if (regs->flags & PERF_EFLAGS_EXACT) - misc |= PERF_RECORD_MISC_EXACT_IP; +static unsigned long guest_misc_flags(struct pt_regs *regs) +{ + unsigned long guest_state = perf_guest_state(); + + if (!(guest_state & PERF_GUEST_ACTIVE)) + return 0; + + if (guest_state & PERF_GUEST_USER) + return PERF_RECORD_MISC_GUEST_USER; + else + return PERF_RECORD_MISC_GUEST_KERNEL; + +} + +static unsigned long host_misc_flags(struct pt_regs *regs) +{ + if (user_mode(regs)) + return PERF_RECORD_MISC_USER; + else + return PERF_RECORD_MISC_KERNEL; +} + +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) +{ + unsigned long flags = common_misc_flags(regs); + + flags |= guest_misc_flags(regs); + + return flags; +} + +unsigned long perf_arch_misc_flags(struct pt_regs *regs) +{ + unsigned long flags = common_misc_flags(regs); + + flags |= host_misc_flags(regs); - return misc; + return flags; } void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 974e917e65b2..8f78b0c900ef 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -557,9 +557,6 @@ static int bts_event_init(struct perf_event *event) * disabled, so disallow intel_bts driver for unprivileged * users on paranoid systems since it provides trace data * to the user in a zero-copy fashion. - * - * Note that the default paranoia setting permits unprivileged - * users to profile the kernel. */ if (event->attr.exclude_kernel) { ret = perf_allow_kernel(&event->attr); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0c9c2706d4ec..bb284aff7bfd 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3962,8 +3962,8 @@ static int intel_pmu_hw_config(struct perf_event *event) if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; - if (!(event->attr.sample_type & - ~intel_pmu_large_pebs_flags(event))) { + if (!(event->attr.sample_type & ~intel_pmu_large_pebs_flags(event)) && + !has_aux_action(event)) { event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; event->attach_state |= PERF_ATTACH_SCHED_CB; } @@ -3972,8 +3972,12 @@ static int intel_pmu_hw_config(struct perf_event *event) x86_pmu.pebs_aliases(event); } - if (needs_branch_stack(event) && is_sampling_event(event)) - event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK; + if (needs_branch_stack(event)) { + /* Avoid branch stack setup for counting events in SAMPLE READ */ + if (is_sampling_event(event) || + !(event->attr.sample_type & PERF_SAMPLE_READ)) + event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK; + } if (branch_sample_counters(event)) { struct perf_event *leader, *sibling; @@ -4589,6 +4593,47 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) return HYBRID_INTEL_CORE; } +static inline bool erratum_hsw11(struct perf_event *event) +{ + return (event->hw.config & INTEL_ARCH_EVENT_MASK) == + X86_CONFIG(.event=0xc0, .umask=0x01); +} + +static struct event_constraint * +arl_h_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->pmu_type == hybrid_tiny) + return cmt_get_event_constraints(cpuc, idx, event); + + return mtl_get_event_constraints(cpuc, idx, event); +} + +static int arl_h_hw_config(struct perf_event *event) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->pmu_type == hybrid_tiny) + return intel_pmu_hw_config(event); + + return adl_hw_config(event); +} + +/* + * The HSW11 requires a period larger than 100 which is the same as the BDM11. + * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. + * + * The message 'interrupt took too long' can be observed on any counter which + * was armed with a period < 32 and two events expired in the same NMI. + * A minimum period of 32 is enforced for the rest of the events. + */ +static void hsw_limit_period(struct perf_event *event, s64 *left) +{ + *left = max(*left, erratum_hsw11(event) ? 128 : 32); +} + /* * Broadwell: * @@ -4606,8 +4651,7 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) */ static void bdw_limit_period(struct perf_event *event, s64 *left) { - if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == - X86_CONFIG(.event=0xc0, .umask=0x01)) { + if (erratum_hsw11(event)) { if (*left < 128) *left = 128; *left &= ~0x3fULL; @@ -4902,17 +4946,26 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) /* * This essentially just maps between the 'hybrid_cpu_type' - * and 'hybrid_pmu_type' enums: + * and 'hybrid_pmu_type' enums except for ARL-H processor + * which needs to compare atom uarch native id since ARL-H + * contains two different atom uarchs. */ for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; + u32 native_id; - if (cpu_type == HYBRID_INTEL_CORE && - pmu_type == hybrid_big) - return &x86_pmu.hybrid_pmu[i]; - if (cpu_type == HYBRID_INTEL_ATOM && - pmu_type == hybrid_small) + if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big) return &x86_pmu.hybrid_pmu[i]; + if (cpu_type == HYBRID_INTEL_ATOM) { + if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small) + return &x86_pmu.hybrid_pmu[i]; + + native_id = get_this_hybrid_cpu_native_id(); + if (native_id == skt_native_id && pmu_type == hybrid_small) + return &x86_pmu.hybrid_pmu[i]; + if (native_id == cmt_native_id && pmu_type == hybrid_tiny) + return &x86_pmu.hybrid_pmu[i]; + } } return NULL; @@ -5943,6 +5996,37 @@ static struct attribute *lnl_hybrid_events_attrs[] = { NULL }; +/* The event string must be in PMU IDX order. */ +EVENT_ATTR_STR_HYBRID(topdown-retiring, + td_retiring_arl_h, + "event=0xc2,umask=0x02;event=0x00,umask=0x80;event=0xc2,umask=0x0", + hybrid_big_small_tiny); +EVENT_ATTR_STR_HYBRID(topdown-bad-spec, + td_bad_spec_arl_h, + "event=0x73,umask=0x0;event=0x00,umask=0x81;event=0x73,umask=0x0", + hybrid_big_small_tiny); +EVENT_ATTR_STR_HYBRID(topdown-fe-bound, + td_fe_bound_arl_h, + "event=0x9c,umask=0x01;event=0x00,umask=0x82;event=0x71,umask=0x0", + hybrid_big_small_tiny); +EVENT_ATTR_STR_HYBRID(topdown-be-bound, + td_be_bound_arl_h, + "event=0xa4,umask=0x02;event=0x00,umask=0x83;event=0x74,umask=0x0", + hybrid_big_small_tiny); + +static struct attribute *arl_h_hybrid_events_attrs[] = { + EVENT_PTR(slots_adl), + EVENT_PTR(td_retiring_arl_h), + EVENT_PTR(td_bad_spec_arl_h), + EVENT_PTR(td_fe_bound_arl_h), + EVENT_PTR(td_be_bound_arl_h), + EVENT_PTR(td_heavy_ops_adl), + EVENT_PTR(td_br_mis_adl), + EVENT_PTR(td_fetch_lat_adl), + EVENT_PTR(td_mem_bound_adl), + NULL, +}; + /* Must be in IDX order */ EVENT_ATTR_STR_HYBRID(mem-loads, mem_ld_adl, "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3", hybrid_big_small); EVENT_ATTR_STR_HYBRID(mem-stores, mem_st_adl, "event=0xd0,umask=0x6;event=0xcd,umask=0x2", hybrid_big_small); @@ -5961,6 +6045,21 @@ static struct attribute *mtl_hybrid_mem_attrs[] = { NULL }; +EVENT_ATTR_STR_HYBRID(mem-loads, + mem_ld_arl_h, + "event=0xd0,umask=0x5,ldlat=3;event=0xcd,umask=0x1,ldlat=3;event=0xd0,umask=0x5,ldlat=3", + hybrid_big_small_tiny); +EVENT_ATTR_STR_HYBRID(mem-stores, + mem_st_arl_h, + "event=0xd0,umask=0x6;event=0xcd,umask=0x2;event=0xd0,umask=0x6", + hybrid_big_small_tiny); + +static struct attribute *arl_h_hybrid_mem_attrs[] = { + EVENT_PTR(mem_ld_arl_h), + EVENT_PTR(mem_st_arl_h), + NULL, +}; + EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big); EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big); @@ -5984,8 +6083,8 @@ static struct attribute *adl_hybrid_tsx_attrs[] = { FORMAT_ATTR_HYBRID(in_tx, hybrid_big); FORMAT_ATTR_HYBRID(in_tx_cp, hybrid_big); -FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small); -FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small); +FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small_tiny); +FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small_tiny); FORMAT_ATTR_HYBRID(frontend, hybrid_big); #define ADL_HYBRID_RTM_FORMAT_ATTR \ @@ -6008,7 +6107,7 @@ static struct attribute *adl_hybrid_extra_attr[] = { NULL }; -FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small); +FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small_tiny); static struct attribute *mtl_hybrid_extra_attr_rtm[] = { ADL_HYBRID_RTM_FORMAT_ATTR, @@ -6216,8 +6315,9 @@ static inline int intel_pmu_v6_addr_offset(int index, bool eventsel) } static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { - { hybrid_small, "cpu_atom" }, - { hybrid_big, "cpu_core" }, + { hybrid_small, "cpu_atom" }, + { hybrid_big, "cpu_core" }, + { hybrid_tiny, "cpu_lowpower" }, }; static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) @@ -6250,7 +6350,7 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) 0, x86_pmu_num_counters(&pmu->pmu), 0, 0); pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; - if (pmu->pmu_type & hybrid_small) { + if (pmu->pmu_type & hybrid_small_tiny) { pmu->intel_cap.perf_metrics = 0; pmu->intel_cap.pebs_output_pt_available = 1; pmu->mid_ack = true; @@ -6766,6 +6866,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.limit_period = hsw_limit_period; x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; @@ -7088,6 +7189,37 @@ __init int intel_pmu_init(void) name = "lunarlake_hybrid"; break; + case INTEL_ARROWLAKE_H: + intel_pmu_init_hybrid(hybrid_big_small_tiny); + + x86_pmu.pebs_latency_data = arl_h_latency_data; + x86_pmu.get_event_constraints = arl_h_get_event_constraints; + x86_pmu.hw_config = arl_h_hw_config; + + td_attr = arl_h_hybrid_events_attrs; + mem_attr = arl_h_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities. */ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + intel_pmu_init_lnc(&pmu->pmu); + + /* Initialize Atom core specific PerfMon capabilities. */ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; + intel_pmu_init_skt(&pmu->pmu); + + /* Initialize Lower Power Atom specific PerfMon capabilities. */ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX]; + intel_pmu_init_grt(&pmu->pmu); + pmu->extra_regs = intel_cmt_extra_regs; + + intel_pmu_pebs_data_source_arl_h(); + pr_cont("ArrowLake-H Hybrid events, "); + name = "arrowlake_h_hybrid"; + break; + default: switch (x86_pmu.version) { case 1: diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9f116dfc4728..ae4ec16156bb 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -128,10 +128,6 @@ static ssize_t __cstate_##_var##_show(struct device *dev, \ static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) -static ssize_t cstate_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, - char *buf); - /* Model -> events mapping */ struct cstate_model { unsigned long core_events; @@ -206,22 +202,9 @@ static struct attribute_group cstate_format_attr_group = { .attrs = cstate_format_attrs, }; -static cpumask_t cstate_core_cpu_mask; -static DEVICE_ATTR(cpumask, S_IRUGO, cstate_get_attr_cpumask, NULL); - -static struct attribute *cstate_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group cpumask_attr_group = { - .attrs = cstate_cpumask_attrs, -}; - static const struct attribute_group *cstate_attr_groups[] = { &cstate_events_attr_group, &cstate_format_attr_group, - &cpumask_attr_group, NULL, }; @@ -269,8 +252,6 @@ static struct perf_msr pkg_msr[] = { [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, }; -static cpumask_t cstate_pkg_cpu_mask; - /* cstate_module PMU */ static struct pmu cstate_module_pmu; static bool has_cstate_module; @@ -291,28 +272,9 @@ static struct perf_msr module_msr[] = { [PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr }, }; -static cpumask_t cstate_module_cpu_mask; - -static ssize_t cstate_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct pmu *pmu = dev_get_drvdata(dev); - - if (pmu == &cstate_core_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask); - else if (pmu == &cstate_pkg_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask); - else if (pmu == &cstate_module_pmu) - return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask); - else - return 0; -} - static int cstate_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config; - int cpu; if (event->attr.type != event->pmu->type) return -ENOENT; @@ -331,20 +293,13 @@ static int cstate_pmu_event_init(struct perf_event *event) if (!(core_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_core_cpu_mask, - topology_sibling_cpumask(event->cpu)); } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!(pkg_msr_mask & (1 << cfg))) return -EINVAL; - - event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - event->hw.event_base = pkg_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_die_cpumask(event->cpu)); } else if (event->pmu == &cstate_module_pmu) { if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX) return -EINVAL; @@ -352,16 +307,10 @@ static int cstate_pmu_event_init(struct perf_event *event) if (!(module_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = module_msr[cfg].msr; - cpu = cpumask_any_and(&cstate_module_cpu_mask, - topology_cluster_cpumask(event->cpu)); } else { return -ENOENT; } - if (cpu >= nr_cpu_ids) - return -ENODEV; - - event->cpu = cpu; event->hw.config = cfg; event->hw.idx = -1; return 0; @@ -412,84 +361,6 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) return 0; } -/* - * Check if exiting cpu is the designated reader. If so migrate the - * events when there is a valid target available - */ -static int cstate_cpu_exit(unsigned int cpu) -{ - unsigned int target; - - if (has_cstate_core && - cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) { - - target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_core_cpu_mask); - perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); - } - } - - if (has_cstate_pkg && - cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { - - target = cpumask_any_but(topology_die_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_pkg_cpu_mask); - perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); - } - } - - if (has_cstate_module && - cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) { - - target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu); - /* Migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &cstate_module_cpu_mask); - perf_pmu_migrate_context(&cstate_module_pmu, cpu, target); - } - } - return 0; -} - -static int cstate_cpu_init(unsigned int cpu) -{ - unsigned int target; - - /* - * If this is the first online thread of that core, set it in - * the core cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_core_cpu_mask, - topology_sibling_cpumask(cpu)); - - if (has_cstate_core && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_core_cpu_mask); - - /* - * If this is the first online thread of that package, set it - * in the package cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_die_cpumask(cpu)); - if (has_cstate_pkg && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); - - /* - * If this is the first online thread of that cluster, set it - * in the cluster cpu mask as the designated reader. - */ - target = cpumask_any_and(&cstate_module_cpu_mask, - topology_cluster_cpumask(cpu)); - if (has_cstate_module && target >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_module_cpu_mask); - - return 0; -} - static const struct attribute_group *core_attr_update[] = { &group_cstate_core_c1, &group_cstate_core_c3, @@ -526,6 +397,7 @@ static struct pmu cstate_core_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_CORE, .module = THIS_MODULE, }; @@ -541,6 +413,7 @@ static struct pmu cstate_pkg_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_PKG, .module = THIS_MODULE, }; @@ -556,6 +429,7 @@ static struct pmu cstate_module_pmu = { .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .scope = PERF_PMU_SCOPE_CLUSTER, .module = THIS_MODULE, }; @@ -810,9 +684,6 @@ static int __init cstate_probe(const struct cstate_model *cm) static inline void cstate_cleanup(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_CSTATE_STARTING); - if (has_cstate_core) perf_pmu_unregister(&cstate_core_pmu); @@ -827,11 +698,6 @@ static int __init cstate_init(void) { int err; - cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_STARTING, - "perf/x86/cstate:starting", cstate_cpu_init, NULL); - cpuhp_setup_state(CPUHP_AP_PERF_X86_CSTATE_ONLINE, - "perf/x86/cstate:online", NULL, cstate_cpu_exit); - if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); if (err) { @@ -844,6 +710,8 @@ static int __init cstate_init(void) if (has_cstate_pkg) { if (topology_max_dies_per_package() > 1) { + /* CLX-AP is multi-die and the cstate is die-scope */ + cstate_pkg_pmu.scope = PERF_PMU_SCOPE_DIE; err = perf_pmu_register(&cstate_pkg_pmu, "cstate_die", -1); } else { diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index fa5ea65de0d0..8afc4ad3cd16 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -177,6 +177,17 @@ void __init intel_pmu_pebs_data_source_mtl(void) __intel_pmu_pebs_data_source_cmt(data_source); } +void __init intel_pmu_pebs_data_source_arl_h(void) +{ + u64 *data_source; + + intel_pmu_pebs_data_source_lnl(); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_TINY_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_cmt(data_source); +} + void __init intel_pmu_pebs_data_source_cmt(void) { __intel_pmu_pebs_data_source_cmt(pebs_data_source); @@ -388,6 +399,16 @@ u64 lnl_latency_data(struct perf_event *event, u64 status) return lnc_latency_data(event, status); } +u64 arl_h_latency_data(struct perf_event *event, u64 status) +{ + struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); + + if (pmu->pmu_type == hybrid_tiny) + return cmt_latency_data(event, status); + + return lnl_latency_data(event, status); +} + static u64 load_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index b4aa8daa4773..4b0373bc8ab4 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -416,7 +416,10 @@ static bool pt_event_valid(struct perf_event *event) static void pt_config_start(struct perf_event *event) { struct pt *pt = this_cpu_ptr(&pt_ctx); - u64 ctl = event->hw.config; + u64 ctl = event->hw.aux_config; + + if (READ_ONCE(event->hw.aux_paused)) + return; ctl |= RTIT_CTL_TRACEEN; if (READ_ONCE(pt->vmx_on)) @@ -424,7 +427,7 @@ static void pt_config_start(struct perf_event *event) else wrmsrl(MSR_IA32_RTIT_CTL, ctl); - WRITE_ONCE(event->hw.config, ctl); + WRITE_ONCE(event->hw.aux_config, ctl); } /* Address ranges and their corresponding msr configuration registers */ @@ -503,7 +506,7 @@ static void pt_config(struct perf_event *event) u64 reg; /* First round: clear STATUS, in particular the PSB byte counter. */ - if (!event->hw.config) { + if (!event->hw.aux_config) { perf_event_itrace_started(event); wrmsrl(MSR_IA32_RTIT_STATUS, 0); } @@ -533,14 +536,31 @@ static void pt_config(struct perf_event *event) reg |= (event->attr.config & PT_CONFIG_MASK); - event->hw.config = reg; + event->hw.aux_config = reg; + + /* + * Allow resume before starting so as not to overwrite a value set by a + * PMI. + */ + barrier(); + WRITE_ONCE(pt->resume_allowed, 1); + /* Configuration is complete, it is now OK to handle an NMI */ + barrier(); + WRITE_ONCE(pt->handle_nmi, 1); + barrier(); pt_config_start(event); + barrier(); + /* + * Allow pause after starting so its pt_config_stop() doesn't race with + * pt_config_start(). + */ + WRITE_ONCE(pt->pause_allowed, 1); } static void pt_config_stop(struct perf_event *event) { struct pt *pt = this_cpu_ptr(&pt_ctx); - u64 ctl = READ_ONCE(event->hw.config); + u64 ctl = READ_ONCE(event->hw.aux_config); /* may be already stopped by a PMI */ if (!(ctl & RTIT_CTL_TRACEEN)) @@ -550,7 +570,7 @@ static void pt_config_stop(struct perf_event *event) if (!READ_ONCE(pt->vmx_on)) wrmsrl(MSR_IA32_RTIT_CTL, ctl); - WRITE_ONCE(event->hw.config, ctl); + WRITE_ONCE(event->hw.aux_config, ctl); /* * A wrmsr that disables trace generation serializes other PT @@ -828,11 +848,13 @@ static void pt_buffer_advance(struct pt_buffer *buf) buf->cur_idx++; if (buf->cur_idx == buf->cur->last) { - if (buf->cur == buf->last) + if (buf->cur == buf->last) { buf->cur = buf->first; - else + buf->wrapped = true; + } else { buf->cur = list_entry(buf->cur->list.next, struct topa, list); + } buf->cur_idx = 0; } } @@ -846,8 +868,11 @@ static void pt_buffer_advance(struct pt_buffer *buf) static void pt_update_head(struct pt *pt) { struct pt_buffer *buf = perf_get_aux(&pt->handle); + bool wrapped = buf->wrapped; u64 topa_idx, base, old; + buf->wrapped = false; + if (buf->single) { local_set(&buf->data_size, buf->output_off); return; @@ -865,7 +890,7 @@ static void pt_update_head(struct pt *pt) } else { old = (local64_xchg(&buf->head, base) & ((buf->nr_pages << PAGE_SHIFT) - 1)); - if (base < old) + if (base < old || (base == old && wrapped)) base += buf->nr_pages << PAGE_SHIFT; local_add(base - old, &buf->data_size); @@ -1511,6 +1536,7 @@ void intel_pt_interrupt(void) buf = perf_aux_output_begin(&pt->handle, event); if (!buf) { event->hw.state = PERF_HES_STOPPED; + WRITE_ONCE(pt->resume_allowed, 0); return; } @@ -1519,6 +1545,7 @@ void intel_pt_interrupt(void) ret = pt_buffer_reset_markers(buf, &pt->handle); if (ret) { perf_aux_output_end(&pt->handle, 0); + WRITE_ONCE(pt->resume_allowed, 0); return; } @@ -1557,7 +1584,7 @@ void intel_pt_handle_vmx(int on) /* Turn PTs back on */ if (!on && event) - wrmsrl(MSR_IA32_RTIT_CTL, event->hw.config); + wrmsrl(MSR_IA32_RTIT_CTL, event->hw.aux_config); local_irq_restore(flags); } @@ -1573,6 +1600,26 @@ static void pt_event_start(struct perf_event *event, int mode) struct pt *pt = this_cpu_ptr(&pt_ctx); struct pt_buffer *buf; + if (mode & PERF_EF_RESUME) { + if (READ_ONCE(pt->resume_allowed)) { + u64 status; + + /* + * Only if the trace is not active and the error and + * stopped bits are clear, is it safe to start, but a + * PMI might have just cleared these, so resume_allowed + * must be checked again also. + */ + rdmsrl(MSR_IA32_RTIT_STATUS, status); + if (!(status & (RTIT_STATUS_TRIGGEREN | + RTIT_STATUS_ERROR | + RTIT_STATUS_STOPPED)) && + READ_ONCE(pt->resume_allowed)) + pt_config_start(event); + } + return; + } + buf = perf_aux_output_begin(&pt->handle, event); if (!buf) goto fail_stop; @@ -1583,7 +1630,6 @@ static void pt_event_start(struct perf_event *event, int mode) goto fail_end_stop; } - WRITE_ONCE(pt->handle_nmi, 1); hwc->state = 0; pt_config_buffer(buf); @@ -1601,11 +1647,27 @@ static void pt_event_stop(struct perf_event *event, int mode) { struct pt *pt = this_cpu_ptr(&pt_ctx); + if (mode & PERF_EF_PAUSE) { + if (READ_ONCE(pt->pause_allowed)) + pt_config_stop(event); + return; + } + /* * Protect against the PMI racing with disabling wrmsr, * see comment in intel_pt_interrupt(). */ WRITE_ONCE(pt->handle_nmi, 0); + barrier(); + + /* + * Prevent a resume from attempting to restart tracing, or a pause + * during a subsequent start. Do this after clearing handle_nmi so that + * pt_event_snapshot_aux() will not re-allow them. + */ + WRITE_ONCE(pt->pause_allowed, 0); + WRITE_ONCE(pt->resume_allowed, 0); + barrier(); pt_config_stop(event); @@ -1656,12 +1718,15 @@ static long pt_event_snapshot_aux(struct perf_event *event, if (WARN_ON_ONCE(!buf->snapshot)) return 0; + /* Prevent pause/resume from attempting to start/stop tracing */ + WRITE_ONCE(pt->pause_allowed, 0); + WRITE_ONCE(pt->resume_allowed, 0); + barrier(); /* - * Here, handle_nmi tells us if the tracing is on + * There is no PT interrupt in this mode, so stop the trace and it will + * remain stopped while the buffer is copied. */ - if (READ_ONCE(pt->handle_nmi)) - pt_config_stop(event); - + pt_config_stop(event); pt_read_offset(buf); pt_update_head(pt); @@ -1673,12 +1738,16 @@ static long pt_event_snapshot_aux(struct perf_event *event, ret = perf_output_copy_aux(&pt->handle, handle, from, to); /* - * If the tracing was on when we turned up, restart it. - * Compiler barrier not needed as we couldn't have been - * preempted by anything that touches pt->handle_nmi. + * Here, handle_nmi tells us if the tracing was on. + * If the tracing was on, restart it. */ - if (pt->handle_nmi) + if (READ_ONCE(pt->handle_nmi)) { + WRITE_ONCE(pt->resume_allowed, 1); + barrier(); pt_config_start(event); + barrier(); + WRITE_ONCE(pt->pause_allowed, 1); + } return ret; } @@ -1794,7 +1863,9 @@ static __init int pt_init(void) if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG; - pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; + pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | + PERF_PMU_CAP_ITRACE | + PERF_PMU_CAP_AUX_PAUSE; pt_pmu.pmu.attr_groups = pt_attr_groups; pt_pmu.pmu.task_ctx_nr = perf_sw_context; pt_pmu.pmu.event_init = pt_event_init; diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h index f5e46c04c145..7ee94fc6d7cb 100644 --- a/arch/x86/events/intel/pt.h +++ b/arch/x86/events/intel/pt.h @@ -65,6 +65,7 @@ struct pt_pmu { * @head: logical write offset inside the buffer * @snapshot: if this is for a snapshot/overwrite counter * @single: use Single Range Output instead of ToPA + * @wrapped: buffer advance wrapped back to the first topa table * @stop_pos: STOP topa entry index * @intr_pos: INT topa entry index * @stop_te: STOP topa entry pointer @@ -82,6 +83,7 @@ struct pt_buffer { local64_t head; bool snapshot; bool single; + bool wrapped; long stop_pos, intr_pos; struct topa_entry *stop_te, *intr_te; void **data_pages; @@ -117,6 +119,8 @@ struct pt_filters { * @filters: last configured filters * @handle_nmi: do handle PT PMI on this cpu, there's an active event * @vmx_on: 1 if VMX is ON on this cpu + * @pause_allowed: PERF_EF_PAUSE is allowed to stop tracing + * @resume_allowed: PERF_EF_RESUME is allowed to start tracing * @output_base: cached RTIT_OUTPUT_BASE MSR value * @output_mask: cached RTIT_OUTPUT_MASK MSR value */ @@ -125,6 +129,8 @@ struct pt { struct pt_filters filters; int handle_nmi; int vmx_on; + int pause_allowed; + int resume_allowed; u64 output_base; u64 output_mask; }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 64ca8625eb58..d98fac567684 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1816,6 +1816,11 @@ static const struct intel_uncore_init_fun mtl_uncore_init __initconst = { .mmio_init = adl_uncore_mmio_init, }; +static const struct intel_uncore_init_fun lnl_uncore_init __initconst = { + .cpu_init = lnl_uncore_cpu_init, + .mmio_init = lnl_uncore_mmio_init, +}; + static const struct intel_uncore_init_fun icx_uncore_init __initconst = { .cpu_init = icx_uncore_cpu_init, .pci_init = icx_uncore_pci_init, @@ -1893,6 +1898,10 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &adl_uncore_init), X86_MATCH_VFM(INTEL_METEORLAKE, &mtl_uncore_init), X86_MATCH_VFM(INTEL_METEORLAKE_L, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), + X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 027ef292c602..79ff32e13dcc 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -611,10 +611,12 @@ void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); void tgl_uncore_cpu_init(void); void adl_uncore_cpu_init(void); +void lnl_uncore_cpu_init(void); void mtl_uncore_cpu_init(void); void tgl_uncore_mmio_init(void); void tgl_l_uncore_mmio_init(void); void adl_uncore_mmio_init(void); +void lnl_uncore_mmio_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 9462fd9f3b7a..3934e1e4e3b1 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -252,6 +252,7 @@ DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); DEFINE_UNCORE_FORMAT_ATTR(threshold, threshold, "config:24-29"); +DEFINE_UNCORE_FORMAT_ATTR(threshold2, threshold, "config:24-31"); /* Sandy Bridge uncore support */ static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -746,6 +747,34 @@ void mtl_uncore_cpu_init(void) uncore_msr_uncores = mtl_msr_uncores; } +static struct intel_uncore_type *lnl_msr_uncores[] = { + &mtl_uncore_cbox, + &mtl_uncore_arb, + NULL +}; + +#define LNL_UNC_MSR_GLOBAL_CTL 0x240e + +static void lnl_uncore_msr_init_box(struct intel_uncore_box *box) +{ + if (box->pmu->pmu_idx == 0) + wrmsrl(LNL_UNC_MSR_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN); +} + +static struct intel_uncore_ops lnl_uncore_msr_ops = { + .init_box = lnl_uncore_msr_init_box, + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + +void lnl_uncore_cpu_init(void) +{ + mtl_uncore_cbox.num_boxes = 4; + mtl_uncore_cbox.ops = &lnl_uncore_msr_ops; + uncore_msr_uncores = lnl_msr_uncores; +} + enum { SNB_PCI_UNCORE_IMC, }; @@ -1475,39 +1504,45 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void) ids++; } + /* Just try to grab 00:00.0 device */ + if (!mc_dev) + mc_dev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); + return mc_dev; } #define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 #define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000 -static void __uncore_imc_init_box(struct intel_uncore_box *box, - unsigned int base_offset) +static void +uncore_get_box_mmio_addr(struct intel_uncore_box *box, + unsigned int base_offset, + int bar_offset, int step) { struct pci_dev *pdev = tgl_uncore_get_mc_dev(); struct intel_uncore_pmu *pmu = box->pmu; struct intel_uncore_type *type = pmu->type; resource_size_t addr; - u32 mch_bar; + u32 bar; if (!pdev) { pr_warn("perf uncore: Cannot find matched IMC device.\n"); return; } - pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar); - /* MCHBAR is disabled */ - if (!(mch_bar & BIT(0))) { - pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n"); + pci_read_config_dword(pdev, bar_offset, &bar); + if (!(bar & BIT(0))) { + pr_warn("perf uncore: BAR 0x%x is disabled. Failed to map %s counters.\n", + bar_offset, type->name); pci_dev_put(pdev); return; } - mch_bar &= ~BIT(0); - addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx); + bar &= ~BIT(0); + addr = (resource_size_t)(bar + step * pmu->pmu_idx); #ifdef CONFIG_PHYS_ADDR_T_64BIT - pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar); - addr |= ((resource_size_t)mch_bar << 32); + pci_read_config_dword(pdev, bar_offset + 4, &bar); + addr |= ((resource_size_t)bar << 32); #endif addr += base_offset; @@ -1518,6 +1553,14 @@ static void __uncore_imc_init_box(struct intel_uncore_box *box, pci_dev_put(pdev); } +static void __uncore_imc_init_box(struct intel_uncore_box *box, + unsigned int base_offset) +{ + uncore_get_box_mmio_addr(box, base_offset, + SNB_UNCORE_PCI_IMC_BAR_OFFSET, + TGL_UNCORE_MMIO_IMC_MEM_OFFSET); +} + static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) { __uncore_imc_init_box(box, 0); @@ -1612,14 +1655,17 @@ static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box) writel(0, box->io_addr + uncore_mmio_box_ctl(box)); } +#define MMIO_UNCORE_COMMON_OPS() \ + .exit_box = uncore_mmio_exit_box, \ + .disable_box = adl_uncore_mmio_disable_box, \ + .enable_box = adl_uncore_mmio_enable_box, \ + .disable_event = intel_generic_uncore_mmio_disable_event, \ + .enable_event = intel_generic_uncore_mmio_enable_event, \ + .read_counter = uncore_mmio_read_counter, + static struct intel_uncore_ops adl_uncore_mmio_ops = { .init_box = adl_uncore_imc_init_box, - .exit_box = uncore_mmio_exit_box, - .disable_box = adl_uncore_mmio_disable_box, - .enable_box = adl_uncore_mmio_enable_box, - .disable_event = intel_generic_uncore_mmio_disable_event, - .enable_event = intel_generic_uncore_mmio_enable_event, - .read_counter = uncore_mmio_read_counter, + MMIO_UNCORE_COMMON_OPS() }; #define ADL_UNC_CTL_CHMASK_MASK 0x00000f00 @@ -1703,3 +1749,108 @@ void adl_uncore_mmio_init(void) } /* end of Alder Lake MMIO uncore support */ + +/* Lunar Lake MMIO uncore support */ +#define LNL_UNCORE_PCI_SAFBAR_OFFSET 0x68 +#define LNL_UNCORE_MAP_SIZE 0x1000 +#define LNL_UNCORE_SNCU_BASE 0xE4B000 +#define LNL_UNCORE_SNCU_CTR 0x390 +#define LNL_UNCORE_SNCU_CTRL 0x398 +#define LNL_UNCORE_SNCU_BOX_CTL 0x380 +#define LNL_UNCORE_GLOBAL_CTL 0x700 +#define LNL_UNCORE_HBO_BASE 0xE54000 +#define LNL_UNCORE_HBO_OFFSET -4096 +#define LNL_UNCORE_HBO_CTR 0x570 +#define LNL_UNCORE_HBO_CTRL 0x550 +#define LNL_UNCORE_HBO_BOX_CTL 0x548 + +#define LNL_UNC_CTL_THRESHOLD 0xff000000 +#define LNL_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + SNB_UNC_CTL_UMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET | \ + SNB_UNC_CTL_INVERT | \ + LNL_UNC_CTL_THRESHOLD) + +static struct attribute *lnl_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_threshold2.attr, + NULL +}; + +static const struct attribute_group lnl_uncore_format_group = { + .name = "format", + .attrs = lnl_uncore_formats_attr, +}; + +static void lnl_uncore_hbo_init_box(struct intel_uncore_box *box) +{ + uncore_get_box_mmio_addr(box, LNL_UNCORE_HBO_BASE, + LNL_UNCORE_PCI_SAFBAR_OFFSET, + LNL_UNCORE_HBO_OFFSET); +} + +static struct intel_uncore_ops lnl_uncore_hbo_ops = { + .init_box = lnl_uncore_hbo_init_box, + MMIO_UNCORE_COMMON_OPS() +}; + +static struct intel_uncore_type lnl_uncore_hbo = { + .name = "hbo", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 64, + .perf_ctr = LNL_UNCORE_HBO_CTR, + .event_ctl = LNL_UNCORE_HBO_CTRL, + .event_mask = LNL_UNC_RAW_EVENT_MASK, + .box_ctl = LNL_UNCORE_HBO_BOX_CTL, + .mmio_map_size = LNL_UNCORE_MAP_SIZE, + .ops = &lnl_uncore_hbo_ops, + .format_group = &lnl_uncore_format_group, +}; + +static void lnl_uncore_sncu_init_box(struct intel_uncore_box *box) +{ + uncore_get_box_mmio_addr(box, LNL_UNCORE_SNCU_BASE, + LNL_UNCORE_PCI_SAFBAR_OFFSET, + 0); + + if (box->io_addr) + writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + LNL_UNCORE_GLOBAL_CTL); +} + +static struct intel_uncore_ops lnl_uncore_sncu_ops = { + .init_box = lnl_uncore_sncu_init_box, + MMIO_UNCORE_COMMON_OPS() +}; + +static struct intel_uncore_type lnl_uncore_sncu = { + .name = "sncu", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 64, + .perf_ctr = LNL_UNCORE_SNCU_CTR, + .event_ctl = LNL_UNCORE_SNCU_CTRL, + .event_mask = LNL_UNC_RAW_EVENT_MASK, + .box_ctl = LNL_UNCORE_SNCU_BOX_CTL, + .mmio_map_size = LNL_UNCORE_MAP_SIZE, + .ops = &lnl_uncore_sncu_ops, + .format_group = &lnl_uncore_format_group, +}; + +static struct intel_uncore_type *lnl_mmio_uncores[] = { + &adl_uncore_imc, + &lnl_uncore_hbo, + &lnl_uncore_sncu, + &adl_uncore_imc_free_running, + NULL +}; + +void lnl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = lnl_mmio_uncores; +} + +/* end of Lunar Lake MMIO uncore support */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ac1182141bf6..82c6f45ce975 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -668,24 +668,38 @@ enum { #define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) +/* + * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture + * of the core. Bits 31-24 indicates its core type (Core or Atom) + * and Bits [23:0] indicates the native model ID of the core. + * Core type and native model ID are defined in below enumerations. + */ enum hybrid_cpu_type { HYBRID_INTEL_NONE, HYBRID_INTEL_ATOM = 0x20, HYBRID_INTEL_CORE = 0x40, }; +#define X86_HYBRID_PMU_ATOM_IDX 0 +#define X86_HYBRID_PMU_CORE_IDX 1 +#define X86_HYBRID_PMU_TINY_IDX 2 + enum hybrid_pmu_type { not_hybrid, - hybrid_small = BIT(0), - hybrid_big = BIT(1), - - hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */ + hybrid_small = BIT(X86_HYBRID_PMU_ATOM_IDX), + hybrid_big = BIT(X86_HYBRID_PMU_CORE_IDX), + hybrid_tiny = BIT(X86_HYBRID_PMU_TINY_IDX), + + /* The belows are only used for matching */ + hybrid_big_small = hybrid_big | hybrid_small, + hybrid_small_tiny = hybrid_small | hybrid_tiny, + hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny, }; -#define X86_HYBRID_PMU_ATOM_IDX 0 -#define X86_HYBRID_PMU_CORE_IDX 1 - -#define X86_HYBRID_NUM_PMUS 2 +enum atom_native_id { + cmt_native_id = 0x2, /* Crestmont */ + skt_native_id = 0x3, /* Skymont */ +}; struct x86_hybrid_pmu { struct pmu pmu; @@ -1578,6 +1592,8 @@ u64 cmt_latency_data(struct perf_event *event, u64 status); u64 lnl_latency_data(struct perf_event *event, u64 status); +u64 arl_h_latency_data(struct perf_event *event, u64 status); + extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; @@ -1697,6 +1713,8 @@ void intel_pmu_pebs_data_source_grt(void); void intel_pmu_pebs_data_source_mtl(void); +void intel_pmu_pebs_data_source_arl_h(void); + void intel_pmu_pebs_data_source_cmt(void); void intel_pmu_pebs_data_source_lnl(void); diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index b985ca79cf97..a8defc813c36 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -103,6 +103,19 @@ static struct perf_pmu_events_attr event_attr_##v = { \ .event_str = str, \ }; +/* + * RAPL Package energy counter scope: + * 1. AMD/HYGON platforms have a per-PKG package energy counter + * 2. For Intel platforms + * 2.1. CLX-AP is multi-die and its RAPL MSRs are die-scope + * 2.2. Other Intel platforms are single die systems so the scope can be + * considered as either pkg-scope or die-scope, and we are considering + * them as die-scope. + */ +#define rapl_pmu_is_pkg_scope() \ + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \ + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + struct rapl_pmu { raw_spinlock_t lock; int n_active; @@ -135,14 +148,29 @@ struct rapl_model { /* 1/2^hw_unit Joule */ static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; static struct rapl_pmus *rapl_pmus; -static cpumask_t rapl_cpu_mask; static unsigned int rapl_cntr_mask; static u64 rapl_timer_ms; static struct perf_msr *rapl_msrs; +/* + * Helper functions to get the correct topology macros according to the + * RAPL PMU scope. + */ +static inline unsigned int get_rapl_pmu_idx(int cpu) +{ + return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) : + topology_logical_die_id(cpu); +} + +static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu) +{ + return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) : + topology_die_cpumask(cpu); +} + static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { - unsigned int rapl_pmu_idx = topology_logical_die_id(cpu); + unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu); /* * The unsigned check also catches the '-1' return value for non @@ -340,8 +368,6 @@ static int rapl_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; - event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) return -EINVAL; @@ -360,7 +386,6 @@ static int rapl_pmu_event_init(struct perf_event *event) pmu = cpu_to_rapl_pmu(event->cpu); if (!pmu) return -EINVAL; - event->cpu = pmu->cpu; event->pmu_private = pmu; event->hw.event_base = rapl_msrs[bit].msr; event->hw.config = cfg; @@ -374,23 +399,6 @@ static void rapl_pmu_event_read(struct perf_event *event) rapl_event_update(event); } -static ssize_t rapl_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); -} - -static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); - -static struct attribute *rapl_pmu_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group rapl_pmu_attr_group = { - .attrs = rapl_pmu_attrs, -}; - RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); @@ -438,7 +446,6 @@ static struct attribute_group rapl_pmu_format_group = { }; static const struct attribute_group *rapl_attr_groups[] = { - &rapl_pmu_attr_group, &rapl_pmu_format_group, &rapl_pmu_events_group, NULL, @@ -541,60 +548,6 @@ static struct perf_msr amd_rapl_msrs[] = { [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, }; -static int rapl_cpu_offline(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - int target; - - /* Check if exiting cpu is used for collecting rapl events */ - if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) - return 0; - - pmu->cpu = -1; - /* Find a new cpu to collect rapl events */ - target = cpumask_any_but(topology_die_cpumask(cpu), cpu); - - /* Migrate rapl events to the new target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &rapl_cpu_mask); - pmu->cpu = target; - perf_pmu_migrate_context(pmu->pmu, cpu, target); - } - return 0; -} - -static int rapl_cpu_online(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - int target; - - if (!pmu) { - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) - return -ENOMEM; - - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - rapl_hrtimer_init(pmu); - - rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu; - } - - /* - * Check if there is an online cpu in the package which collects rapl - * events already. - */ - target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu)); - if (target < nr_cpu_ids) - return 0; - - cpumask_set_cpu(cpu, &rapl_cpu_mask); - pmu->cpu = cpu; - return 0; -} - static int rapl_check_hw_unit(struct rapl_model *rm) { u64 msr_rapl_power_unit_bits; @@ -673,9 +626,41 @@ static const struct attribute_group *rapl_attr_update[] = { NULL, }; +static int __init init_rapl_pmu(void) +{ + struct rapl_pmu *pmu; + int idx; + + for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) { + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + if (!pmu) + goto free; + + raw_spin_lock_init(&pmu->lock); + INIT_LIST_HEAD(&pmu->active_list); + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(pmu); + + rapl_pmus->pmus[idx] = pmu; + } + + return 0; +free: + for (; idx > 0; idx--) + kfree(rapl_pmus->pmus[idx - 1]); + return -ENOMEM; +} + static int __init init_rapl_pmus(void) { - int nr_rapl_pmu = topology_max_packages() * topology_max_dies_per_package(); + int nr_rapl_pmu = topology_max_packages(); + int rapl_pmu_scope = PERF_PMU_SCOPE_PKG; + + if (!rapl_pmu_is_pkg_scope()) { + nr_rapl_pmu *= topology_max_dies_per_package(); + rapl_pmu_scope = PERF_PMU_SCOPE_DIE; + } rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL); if (!rapl_pmus) @@ -691,9 +676,11 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.read = rapl_pmu_event_read; + rapl_pmus->pmu.scope = rapl_pmu_scope; rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; - return 0; + + return init_rapl_pmu(); } static struct rapl_model model_snb = { @@ -839,24 +826,13 @@ static int __init rapl_pmu_init(void) if (ret) return ret; - /* - * Install callbacks. Core will call them for each online cpu. - */ - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, - "perf/x86/rapl:online", - rapl_cpu_online, rapl_cpu_offline); - if (ret) - goto out; - ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); if (ret) - goto out1; + goto out; rapl_advertise(); return 0; -out1: - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); cleanup_rapl_pmus(); @@ -866,7 +842,6 @@ module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); perf_pmu_unregister(&rapl_pmus->pmu); cleanup_rapl_pmus(); } |