summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-04-20 10:05:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-04-20 10:05:02 -0700
commitb25c69b9d5e41159b54ad7cb33f7d9ead8523d33 (patch)
treef50ab38c00630fb06bf19ee170b796594afd59e5 /arch/x86
parent1fd91d719eb1ae83ef500eb4148d11db9db39a41 (diff)
parent7579dfc42d2e3bef901991803efc81dc1ac65f2b (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Misc fixes: - various tooling fixes - kretprobe fixes - kprobes annotation fixes - kprobes error checking fix - fix the default events for AMD Family 17h CPUs - PEBS fix - AUX record fix - address filtering fix" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/kprobes: Avoid kretprobe recursion bug kprobes: Mark ftrace mcount handler functions nokprobe x86/kprobes: Verify stack frame on kretprobe perf/x86/amd: Add event map for AMD Family 17h perf bpf: Return NULL when RB tree lookup fails in perf_env__find_btf() perf tools: Fix map reference counting perf evlist: Fix side band thread draining perf tools: Check maps for bpf programs perf bpf: Return NULL when RB tree lookup fails in perf_env__find_bpf_prog_info() tools include uapi: Sync sound/asound.h copy perf top: Always sample time to satisfy needs of use of ordered queuing perf evsel: Use hweight64() instead of hweight_long(attr.sample_regs_user) tools lib traceevent: Fix missing equality check for strcmp perf stat: Disable DIR_FORMAT feature for 'perf stat record' perf scripts python: export-to-sqlite.py: Fix use of parent_id in calls_view perf header: Fix lock/unlock imbalances when processing BPF/BTF info perf/x86: Fix incorrect PEBS_REGS perf/ring_buffer: Fix AUX record suppression perf/core: Fix the address filtering fix kprobes: Fix error check when reusing optimized probes
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/amd/core.c35
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/events/perf_event.h38
-rw-r--r--arch/x86/kernel/kprobes/core.c48
4 files changed, 92 insertions, 31 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 0ecfac84ba91..d45f3fbd232e 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -117,22 +117,39 @@ static __initconst const u64 amd_hw_cache_event_ids
};
/*
- * AMD Performance Monitor K7 and later.
+ * AMD Performance Monitor K7 and later, up to and including Family 16h:
*/
static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x077e,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
+};
+
+/*
+ * AMD Performance Monitor Family 17h and later:
+ */
+static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187,
};
static u64 amd_pmu_event_map(int hw_event)
{
+ if (boot_cpu_data.x86 >= 0x17)
+ return amd_f17h_perfmon_event_map[hw_event];
+
return amd_perfmon_event_map[hw_event];
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f61dcbef20ff..f9451566cd9b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3131,7 +3131,7 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
flags &= ~PERF_SAMPLE_TIME;
if (!event->attr.exclude_kernel)
flags &= ~PERF_SAMPLE_REGS_USER;
- if (event->attr.sample_regs_user & ~PEBS_REGS)
+ if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
return flags;
}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a75955741c50..1e98a42b560a 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -96,25 +96,25 @@ struct amd_nb {
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \
PERF_SAMPLE_PERIOD)
-#define PEBS_REGS \
- (PERF_REG_X86_AX | \
- PERF_REG_X86_BX | \
- PERF_REG_X86_CX | \
- PERF_REG_X86_DX | \
- PERF_REG_X86_DI | \
- PERF_REG_X86_SI | \
- PERF_REG_X86_SP | \
- PERF_REG_X86_BP | \
- PERF_REG_X86_IP | \
- PERF_REG_X86_FLAGS | \
- PERF_REG_X86_R8 | \
- PERF_REG_X86_R9 | \
- PERF_REG_X86_R10 | \
- PERF_REG_X86_R11 | \
- PERF_REG_X86_R12 | \
- PERF_REG_X86_R13 | \
- PERF_REG_X86_R14 | \
- PERF_REG_X86_R15)
+#define PEBS_GP_REGS \
+ ((1ULL << PERF_REG_X86_AX) | \
+ (1ULL << PERF_REG_X86_BX) | \
+ (1ULL << PERF_REG_X86_CX) | \
+ (1ULL << PERF_REG_X86_DX) | \
+ (1ULL << PERF_REG_X86_DI) | \
+ (1ULL << PERF_REG_X86_SI) | \
+ (1ULL << PERF_REG_X86_SP) | \
+ (1ULL << PERF_REG_X86_BP) | \
+ (1ULL << PERF_REG_X86_IP) | \
+ (1ULL << PERF_REG_X86_FLAGS) | \
+ (1ULL << PERF_REG_X86_R8) | \
+ (1ULL << PERF_REG_X86_R9) | \
+ (1ULL << PERF_REG_X86_R10) | \
+ (1ULL << PERF_REG_X86_R11) | \
+ (1ULL << PERF_REG_X86_R12) | \
+ (1ULL << PERF_REG_X86_R13) | \
+ (1ULL << PERF_REG_X86_R14) | \
+ (1ULL << PERF_REG_X86_R15))
/*
* Per register state.
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index a034cb808e7e..fed46ddb1eef 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -569,6 +569,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
unsigned long *sara = stack_addr(regs);
ri->ret_addr = (kprobe_opcode_t *) *sara;
+ ri->fp = sara;
/* Replace the return addr with trampoline addr */
*sara = (unsigned long) &kretprobe_trampoline;
@@ -748,26 +749,48 @@ asm(
NOKPROBE_SYMBOL(kretprobe_trampoline);
STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+static struct kprobe kretprobe_kprobe = {
+ .addr = (void *)kretprobe_trampoline,
+};
+
/*
* Called from kretprobe_trampoline
*/
static __used void *trampoline_handler(struct pt_regs *regs)
{
+ struct kprobe_ctlblk *kcb;
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
kprobe_opcode_t *correct_ret_addr = NULL;
+ void *frame_pointer;
+ bool skipped = false;
+
+ preempt_disable();
+
+ /*
+ * Set a dummy kprobe for avoiding kretprobe recursion.
+ * Since kretprobe never run in kprobe handler, kprobe must not
+ * be running at this point.
+ */
+ kcb = get_kprobe_ctlblk();
+ __this_cpu_write(current_kprobe, &kretprobe_kprobe);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
/* fixup registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
+ /* On x86-64, we use pt_regs->sp for return address holder. */
+ frame_pointer = &regs->sp;
#else
regs->cs = __KERNEL_CS | get_kernel_rpl();
regs->gs = 0;
+ /* On x86-32, we use pt_regs->flags for return address holder. */
+ frame_pointer = &regs->flags;
#endif
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
@@ -789,8 +812,25 @@ static __used void *trampoline_handler(struct pt_regs *regs)
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
+ /*
+ * Return probes must be pushed on this hash list correct
+ * order (same as return order) so that it can be poped
+ * correctly. However, if we find it is pushed it incorrect
+ * order, this means we find a function which should not be
+ * probed, because the wrong order entry is pushed on the
+ * path of processing other kretprobe itself.
+ */
+ if (ri->fp != frame_pointer) {
+ if (!skipped)
+ pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n");
+ skipped = true;
+ continue;
+ }
orig_ret_address = (unsigned long)ri->ret_addr;
+ if (skipped)
+ pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n",
+ ri->rp->kp.addr);
if (orig_ret_address != trampoline_address)
/*
@@ -808,14 +848,15 @@ static __used void *trampoline_handler(struct pt_regs *regs)
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
+ if (ri->fp != frame_pointer)
+ continue;
orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
__this_cpu_write(current_kprobe, &ri->rp->kp);
- get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
- __this_cpu_write(current_kprobe, NULL);
+ __this_cpu_write(current_kprobe, &kretprobe_kprobe);
}
recycle_rp_inst(ri, &empty_rp);
@@ -831,6 +872,9 @@ static __used void *trampoline_handler(struct pt_regs *regs)
kretprobe_hash_unlock(current, &flags);
+ __this_cpu_write(current_kprobe, NULL);
+ preempt_enable();
+
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);