summaryrefslogtreecommitdiff
path: root/arch/x86/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/core.c')
-rw-r--r--arch/x86/events/core.c224
1 files changed, 112 insertions, 112 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index c75c482d4c52..0c38a31d5fc7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -20,6 +20,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/kdebug.h>
+#include <linux/kvm_types.h>
#include <linux/sched/mm.h>
#include <linux/sched/clock.h>
#include <linux/uaccess.h>
@@ -32,6 +33,7 @@
#include <asm/apic.h>
#include <asm/stacktrace.h>
+#include <asm/msr.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/alternative.h>
@@ -87,13 +89,19 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
DEFINE_STATIC_CALL_NULL(x86_pmu_stop_scheduling, *x86_pmu.stop_scheduling);
DEFINE_STATIC_CALL_NULL(x86_pmu_sched_task, *x86_pmu.sched_task);
-DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
+DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup);
+
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all);
+DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all);
+
/*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
@@ -133,7 +141,7 @@ u64 x86_perf_event_update(struct perf_event *event)
*/
prev_raw_count = local64_read(&hwc->prev_count);
do {
- rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+ new_raw_count = rdpmc(hwc->event_base_rdpmc);
} while (!local64_try_cmpxchg(&hwc->prev_count,
&prev_raw_count, new_raw_count));
@@ -268,7 +276,7 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
*/
for_each_set_bit(i, cntr_mask, X86_PMC_IDX_MAX) {
reg = x86_pmu_config_addr(i);
- ret = rdmsrl_safe(reg, &val);
+ ret = rdmsrq_safe(reg, &val);
if (ret)
goto msr_fail;
if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
@@ -282,7 +290,7 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
if (*(u64 *)fixed_cntr_mask) {
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- ret = rdmsrl_safe(reg, &val);
+ ret = rdmsrq_safe(reg, &val);
if (ret)
goto msr_fail;
for_each_set_bit(i, fixed_cntr_mask, X86_PMC_IDX_MAX) {
@@ -313,11 +321,11 @@ bool check_hw_exists(struct pmu *pmu, unsigned long *cntr_mask,
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
reg = x86_pmu_event_addr(reg_safe);
- if (rdmsrl_safe(reg, &val))
+ if (rdmsrq_safe(reg, &val))
goto msr_fail;
val ^= 0xffffUL;
- ret = wrmsrl_safe(reg, val);
- ret |= rdmsrl_safe(reg, &val_new);
+ ret = wrmsrq_safe(reg, val);
+ ret |= rdmsrq_safe(reg, &val_new);
if (ret || val != val_new)
goto msr_fail;
@@ -547,14 +555,22 @@ static inline int precise_br_compat(struct perf_event *event)
return m == b;
}
-int x86_pmu_max_precise(void)
+int x86_pmu_max_precise(struct pmu *pmu)
{
int precise = 0;
- /* Support for constant skid */
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
- precise++;
+ /* arch PEBS */
+ if (x86_pmu.arch_pebs) {
+ precise = 2;
+ if (hybrid(pmu, arch_pebs_cap).pdists)
+ precise++;
+
+ return precise;
+ }
+ /* legacy PEBS - support for constant skid */
+ precise++;
/* Support for IP fixup */
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
precise++;
@@ -562,13 +578,14 @@ int x86_pmu_max_precise(void)
if (x86_pmu.pebs_prec_dist)
precise++;
}
+
return precise;
}
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
- int precise = x86_pmu_max_precise();
+ int precise = x86_pmu_max_precise(event->pmu);
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
@@ -628,7 +645,7 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.type == event->pmu->type)
event->hw.config |= x86_pmu_get_event_config(event);
- if (event->attr.sample_period && x86_pmu.limit_period) {
+ if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) {
s64 left = event->attr.sample_period;
x86_pmu.limit_period(event, &left);
if (left > event->attr.sample_period)
@@ -673,6 +690,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
event->hw.idx = -1;
event->hw.last_cpu = -1;
event->hw.last_tag = ~0ULL;
+ event->hw.dyn_constraint = ~0ULL;
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
@@ -692,13 +710,13 @@ void x86_pmu_disable_all(void)
if (!test_bit(idx, cpuc->active_mask))
continue;
- rdmsrl(x86_pmu_config_addr(idx), val);
+ rdmsrq(x86_pmu_config_addr(idx), val);
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu_config_addr(idx), val);
+ wrmsrq(x86_pmu_config_addr(idx), val);
if (is_counter_pair(hwc))
- wrmsrl(x86_pmu_config_addr(idx + 1), 0);
+ wrmsrq(x86_pmu_config_addr(idx + 1), 0);
}
}
@@ -706,7 +724,7 @@ struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data)
{
return static_call(x86_pmu_guest_get_msrs)(nr, data);
}
-EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
+EXPORT_SYMBOL_FOR_KVM(perf_guest_get_msrs);
/*
* There may be PMI landing after enabled=0. The PMI hitting could be before or
@@ -753,17 +771,18 @@ void x86_pmu_enable_all(int added)
}
}
-static inline int is_x86_event(struct perf_event *event)
+int is_x86_event(struct perf_event *event)
{
- int i;
-
- if (!is_hybrid())
- return event->pmu == &pmu;
-
- for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
- if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu)
- return true;
- }
+ /*
+ * For a non-hybrid platforms, the type of X86 pmu is
+ * always PERF_TYPE_RAW.
+ * For a hybrid platform, the PERF_PMU_CAP_EXTENDED_HW_TYPE
+ * is a unique capability for the X86 PMU.
+ * Use them to detect a X86 event.
+ */
+ if (event->pmu->type == PERF_TYPE_RAW ||
+ event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)
+ return true;
return false;
}
@@ -1298,6 +1317,15 @@ static void x86_pmu_enable(struct pmu *pmu)
if (cpuc->n_added) {
int n_running = cpuc->n_events - cpuc->n_added;
+
+ /*
+ * The late setup (after counters are scheduled)
+ * is required for some cases, e.g., PEBS counters
+ * snapshotting. Because an accurate counter index
+ * is needed.
+ */
+ static_call_cond(x86_pmu_late_setup)();
+
/*
* apply assignment obtained either from
* hw_perf_group_sched_in() or x86_pmu_enable()
@@ -1326,6 +1354,7 @@ static void x86_pmu_enable(struct pmu *pmu)
hwc->state |= PERF_HES_ARCH;
x86_pmu_stop(event, PERF_EF_UPDATE);
+ cpuc->events[hwc->idx] = NULL;
}
/*
@@ -1347,6 +1376,7 @@ static void x86_pmu_enable(struct pmu *pmu)
* if cpuc->enabled = 0, then no wrmsr as
* per x86_pmu_enable_event()
*/
+ cpuc->events[hwc->idx] = event;
x86_pmu_start(event, PERF_EF_RELOAD);
}
cpuc->n_added = 0;
@@ -1410,14 +1440,14 @@ int x86_perf_event_set_period(struct perf_event *event)
*/
local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrq(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Sign extend the Merge event counter's upper 16 bits since
* we currently declare a 48-bit counter width
*/
if (is_counter_pair(hwc))
- wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
+ wrmsrq(x86_pmu_event_addr(idx + 1), 0xffff);
perf_event_update_userpage(event);
@@ -1513,7 +1543,6 @@ static void x86_pmu_start(struct perf_event *event, int flags)
event->hw.state = 0;
- cpuc->events[idx] = event;
__set_bit(idx, cpuc->active_mask);
static_call(x86_pmu_enable)(event);
perf_event_update_userpage(event);
@@ -1540,10 +1569,10 @@ void perf_event_print_debug(void)
return;
if (x86_pmu.version >= 2) {
- rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
- rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
- rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+ rdmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
pr_info("\n");
pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
@@ -1551,19 +1580,19 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
if (pebs_constraints) {
- rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
+ rdmsrq(MSR_IA32_PEBS_ENABLE, pebs);
pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
}
if (x86_pmu.lbr_nr) {
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ rdmsrq(MSR_IA32_DEBUGCTLMSR, debugctl);
pr_info("CPU#%d: debugctl: %016llx\n", cpu, debugctl);
}
}
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for_each_set_bit(idx, cntr_mask, X86_PMC_IDX_MAX) {
- rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
- rdmsrl(x86_pmu_event_addr(idx), pmc_count);
+ rdmsrq(x86_pmu_config_addr(idx), pmc_ctrl);
+ rdmsrq(x86_pmu_event_addr(idx), pmc_count);
prev_left = per_cpu(pmc_prev_left[idx], cpu);
@@ -1577,7 +1606,7 @@ void perf_event_print_debug(void)
for_each_set_bit(idx, fixed_cntr_mask, X86_PMC_IDX_MAX) {
if (fixed_counter_disabled(idx, cpuc->pmu))
continue;
- rdmsrl(x86_pmu_fixed_ctr_addr(idx), pmc_count);
+ rdmsrq(x86_pmu_fixed_ctr_addr(idx), pmc_count);
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
cpu, idx, pmc_count);
@@ -1592,7 +1621,6 @@ void x86_pmu_stop(struct perf_event *event, int flags)
if (test_bit(hwc->idx, cpuc->active_mask)) {
static_call(x86_pmu_disable)(event);
__clear_bit(hwc->idx, cpuc->active_mask);
- cpuc->events[hwc->idx] = NULL;
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
}
@@ -1630,6 +1658,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
* Not a TXN, therefore cleanup properly.
*/
x86_pmu_stop(event, PERF_EF_UPDATE);
+ cpuc->events[event->hw.idx] = NULL;
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event_list[i])
@@ -1673,6 +1702,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
struct cpu_hw_events *cpuc;
struct perf_event *event;
int idx, handled = 0;
+ u64 last_period;
u64 val;
cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1692,6 +1722,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
continue;
event = cpuc->events[idx];
+ last_period = event->hw.last_period;
val = static_call(x86_pmu_update)(event);
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1705,13 +1736,11 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
if (!static_call(x86_pmu_set_period)(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
- if (has_branch_stack(event))
- perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
if (handled)
@@ -2029,13 +2058,19 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
- static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
static_call_update(x86_pmu_filter, x86_pmu.filter);
+
+ static_call_update(x86_pmu_late_setup, x86_pmu.late_setup);
+
+ static_call_update(x86_pmu_pebs_enable, x86_pmu.pebs_enable);
+ static_call_update(x86_pmu_pebs_disable, x86_pmu.pebs_disable);
+ static_call_update(x86_pmu_pebs_enable_all, x86_pmu.pebs_enable_all);
+ static_call_update(x86_pmu_pebs_disable_all, x86_pmu.pebs_disable_all);
}
static void _x86_pmu_read(struct perf_event *event)
@@ -2045,13 +2080,15 @@ static void _x86_pmu_read(struct perf_event *event)
void x86_pmu_show_pmu_cap(struct pmu *pmu)
{
- pr_info("... version: %d\n", x86_pmu.version);
- pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
- pr_info("... generic registers: %d\n", x86_pmu_num_counters(pmu));
- pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
- pr_info("... max period: %016Lx\n", x86_pmu.max_period);
- pr_info("... fixed-purpose events: %d\n", x86_pmu_num_counters_fixed(pmu));
- pr_info("... event mask: %016Lx\n", hybrid(pmu, intel_ctrl));
+ pr_info("... version: %d\n", x86_pmu.version);
+ pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
+ pr_info("... generic counters: %d\n", x86_pmu_num_counters(pmu));
+ pr_info("... generic bitmap: %016llx\n", hybrid(pmu, cntr_mask64));
+ pr_info("... fixed-purpose counters: %d\n", x86_pmu_num_counters_fixed(pmu));
+ pr_info("... fixed-purpose bitmap: %016llx\n", hybrid(pmu, fixed_cntr_mask64));
+ pr_info("... value mask: %016llx\n", x86_pmu.cntval_mask);
+ pr_info("... max period: %016llx\n", x86_pmu.max_period);
+ pr_info("... global_ctrl mask: %016llx\n", hybrid(pmu, intel_ctrl));
}
static int __init init_hw_perf_events(void)
@@ -2486,9 +2523,9 @@ void perf_clear_dirty_counters(void)
if (!test_bit(i - INTEL_PMC_IDX_FIXED, hybrid(cpuc->pmu, fixed_cntr_mask)))
continue;
- wrmsrl(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
+ wrmsrq(x86_pmu_fixed_ctr_addr(i - INTEL_PMC_IDX_FIXED), 0);
} else {
- wrmsrl(x86_pmu_event_addr(i), 0);
+ wrmsrq(x86_pmu_event_addr(i), 0);
}
}
@@ -2603,7 +2640,9 @@ static ssize_t max_precise_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
+ struct pmu *pmu = dev_get_drvdata(cdev);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise(pmu));
}
static DEVICE_ATTR_RO(max_precise);
@@ -2626,15 +2665,10 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};
-static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
-{
- static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in);
-}
-
-static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc)
+static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
- static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
+ static_call_cond(x86_pmu_sched_task)(pmu_ctx, task, sched_in);
}
void perf_check_microcode(void)
@@ -2701,7 +2735,6 @@ static struct pmu pmu = {
.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
- .swap_task_ctx = x86_pmu_swap_task_ctx,
.check_period = x86_pmu_check_period,
.aux_output_match = x86_pmu_aux_output_match,
@@ -2769,13 +2802,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
return;
}
- if (perf_callchain_store(entry, regs->ip))
- return;
-
- if (perf_hw_regs(regs))
+ if (perf_hw_regs(regs)) {
+ if (perf_callchain_store(entry, regs->ip))
+ return;
unwind_start(&state, current, regs, NULL);
- else
+ } else {
unwind_start(&state, current, NULL, (void *)regs->sp);
+ }
for (; !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
@@ -2799,8 +2832,15 @@ static unsigned long get_segment_base(unsigned int segment)
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
+ /*
+ * If we're not in a valid context with a real (not just lazy)
+ * user mm, then don't even try.
+ */
+ if (!nmi_uaccess_okay())
+ return 0;
+
/* IRQs are off, so this synchronizes with smp_store_release */
- ldt = READ_ONCE(current->active_mm->context.ldt);
+ ldt = smp_load_acquire(&current->mm->context.ldt);
if (!ldt || idx >= ldt->nr_entries)
return 0;
@@ -2818,46 +2858,6 @@ static unsigned long get_segment_base(unsigned int segment)
return get_desc_base(desc);
}
-#ifdef CONFIG_UPROBES
-/*
- * Heuristic-based check if uprobe is installed at the function entry.
- *
- * Under assumption of user code being compiled with frame pointers,
- * `push %rbp/%ebp` is a good indicator that we indeed are.
- *
- * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
- * If we get this wrong, captured stack trace might have one extra bogus
- * entry, but the rest of stack trace will still be meaningful.
- */
-static bool is_uprobe_at_func_entry(struct pt_regs *regs)
-{
- struct arch_uprobe *auprobe;
-
- if (!current->utask)
- return false;
-
- auprobe = current->utask->auprobe;
- if (!auprobe)
- return false;
-
- /* push %rbp/%ebp */
- if (auprobe->insn[0] == 0x55)
- return true;
-
- /* endbr64 (64-bit only) */
- if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn))
- return true;
-
- return false;
-}
-
-#else
-static bool is_uprobe_at_func_entry(struct pt_regs *regs)
-{
- return false;
-}
-#endif /* CONFIG_UPROBES */
-
#ifdef CONFIG_IA32_EMULATION
#include <linux/compat.h>
@@ -3079,7 +3079,7 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
cap->events_mask_len = x86_pmu.events_mask_len;
cap->pebs_ept = x86_pmu.pebs_ept;
}
-EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
+EXPORT_SYMBOL_FOR_KVM(perf_get_x86_pmu_capability);
u64 perf_get_hw_event_config(int hw_event)
{
@@ -3090,4 +3090,4 @@ u64 perf_get_hw_event_config(int hw_event)
return 0;
}
-EXPORT_SYMBOL_GPL(perf_get_hw_event_config);
+EXPORT_SYMBOL_FOR_KVM(perf_get_hw_event_config);