summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-13 13:05:08 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-13 13:05:08 -0800
commit31486372a1e9a66ec2e9e2903b8792bba7e503e1 (patch)
tree84f61e0758695e6fbee8d2b7d7b9bc4a5ce6e03b /arch/x86
parent8e9a2dba8686187d8c8179e5b86640e653963889 (diff)
parentfcdfafcb73be8fa45909327bbddca46fb362a675 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "The main changes in this cycle were: Kernel: - kprobes updates: use better W^X patterns for code modifications, improve optprobes, remove jprobes. (Masami Hiramatsu, Kees Cook) - core fixes: event timekeeping (enabled/running times statistics) fixes, perf_event_read() locking fixes and cleanups, etc. (Peter Zijlstra) - Extend x86 Intel free-running PEBS support and support x86 user-register sampling in perf record and perf script. (Andi Kleen) Tooling: - Completely rework the way inline frames are handled. Instead of querying for the inline nodes on-demand in the individual tools, we now create proper callchain nodes for inlined frames. (Milian Wolff) - 'perf trace' updates (Arnaldo Carvalho de Melo) - Implement a way to print formatted output to per-event files in 'perf script' to facilitate generate flamegraphs, elliminating the need to write scripts to do that separation (yuzhoujian, Arnaldo Carvalho de Melo) - Update vendor events JSON metrics for Intel's Broadwell, Broadwell Server, Haswell, Haswell Server, IvyBridge, IvyTown, JakeTown, Sandy Bridge, Skylake, SkyLake Server - and Goldmont Plus V1 (Andi Kleen, Kan Liang) - Multithread the synthesizing of PERF_RECORD_ events for pre-existing threads in 'perf top', speeding up that phase, greatly improving the user experience in systems such as Intel's Knights Mill (Kan Liang) - Introduce the concept of weak groups in 'perf stat': try to set up a group, but if it's not schedulable fallback to not using a group. That gives us the best of both worlds: groups if they work, but still a usable fallback if they don't. E.g: (Andi Kleen) - perf sched timehist enhancements (David Ahern) - ... various other enhancements, updates, cleanups and fixes" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (139 commits) kprobes: Don't spam the build log with deprecation warnings arm/kprobes: Remove jprobe test case arm/kprobes: Fix kretprobe test to check correct counter perf srcline: Show correct function name for srcline of callchains perf srcline: Fix memory leak in addr2inlines() perf trace beauty kcmp: Beautify arguments perf trace beauty: Implement pid_fd beautifier tools include uapi: Grab a copy of linux/kcmp.h perf callchain: Fix double mapping al->addr for children without self period perf stat: Make --per-thread update shadow stats to show metrics perf stat: Move the shadow stats scale computation in perf_stat__update_shadow_stats perf tools: Add perf_data_file__write function perf tools: Add struct perf_data_file perf tools: Rename struct perf_data_file to perf_data perf script: Print information about per-event-dump files perf trace beauty prctl: Generate 'option' string table from kernel headers tools include uapi: Grab a copy of linux/prctl.h perf script: Allow creating per-event dump files perf evsel: Restore evsel->priv as a tool private area perf script: Use event_format__fprintf() ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/intel/core.c4
-rw-r--r--arch/x86/events/perf_event.h24
-rw-r--r--arch/x86/include/asm/kprobes.h4
-rw-r--r--arch/x86/kernel/kprobes/common.h6
-rw-r--r--arch/x86/kernel/kprobes/core.c61
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c32
-rw-r--r--arch/x86/kernel/kprobes/opt.c79
7 files changed, 131 insertions, 79 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9fb9a1f1e47b..43445da30cea 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
if (event->attr.use_clockid)
flags &= ~PERF_SAMPLE_TIME;
+ if (!event->attr.exclude_kernel)
+ flags &= ~PERF_SAMPLE_REGS_USER;
+ if (event->attr.sample_regs_user & ~PEBS_REGS)
+ flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
return flags;
}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 4196f81ec0e1..f7aaadf9331f 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -85,13 +85,15 @@ struct amd_nb {
* Flags PEBS can handle without an PMI.
*
* TID can only be handled by flushing at context switch.
+ * REGS_USER can be handled for events limited to ring 3.
*
*/
#define PEBS_FREERUNNING_FLAGS \
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
- PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
+ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
+ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
/*
* A debug store configuration.
@@ -110,6 +112,26 @@ struct debug_store {
u64 pebs_event_reset[MAX_PEBS_EVENTS];
};
+#define PEBS_REGS \
+ (PERF_REG_X86_AX | \
+ PERF_REG_X86_BX | \
+ PERF_REG_X86_CX | \
+ PERF_REG_X86_DX | \
+ PERF_REG_X86_DI | \
+ PERF_REG_X86_SI | \
+ PERF_REG_X86_SP | \
+ PERF_REG_X86_BP | \
+ PERF_REG_X86_IP | \
+ PERF_REG_X86_FLAGS | \
+ PERF_REG_X86_R8 | \
+ PERF_REG_X86_R9 | \
+ PERF_REG_X86_R10 | \
+ PERF_REG_X86_R11 | \
+ PERF_REG_X86_R12 | \
+ PERF_REG_X86_R13 | \
+ PERF_REG_X86_R14 | \
+ PERF_REG_X86_R15)
+
/*
* Per register state.
*/
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 6cf65437b5e5..9f2e3102e0bb 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -58,8 +58,8 @@ extern __visible kprobe_opcode_t optprobe_template_call[];
extern __visible kprobe_opcode_t optprobe_template_end[];
#define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
#define MAX_OPTINSN_SIZE \
- (((unsigned long)&optprobe_template_end - \
- (unsigned long)&optprobe_template_entry) + \
+ (((unsigned long)optprobe_template_end - \
+ (unsigned long)optprobe_template_entry) + \
MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
extern const int kretprobe_blacklist_size;
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 615105cf7d58..ae38dccf0c8f 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -85,11 +85,11 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
* Copy an instruction and adjust the displacement if the instruction
* uses the %rip-relative addressing mode.
*/
-extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn);
+extern int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn);
/* Generate a relative-jump/call instruction */
-extern void synthesize_reljump(void *from, void *to);
-extern void synthesize_relcall(void *from, void *to);
+extern void synthesize_reljump(void *dest, void *from, void *to);
+extern void synthesize_relcall(void *dest, void *from, void *to);
#ifdef CONFIG_OPTPROBES
extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0742491cbb73..bd36f3c33cd0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -119,29 +119,29 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
static nokprobe_inline void
-__synthesize_relative_insn(void *from, void *to, u8 op)
+__synthesize_relative_insn(void *dest, void *from, void *to, u8 op)
{
struct __arch_relative_insn {
u8 op;
s32 raddr;
} __packed *insn;
- insn = (struct __arch_relative_insn *)from;
+ insn = (struct __arch_relative_insn *)dest;
insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
insn->op = op;
}
/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-void synthesize_reljump(void *from, void *to)
+void synthesize_reljump(void *dest, void *from, void *to)
{
- __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
+ __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE);
}
NOKPROBE_SYMBOL(synthesize_reljump);
/* Insert a call instruction at address 'from', which calls address 'to'.*/
-void synthesize_relcall(void *from, void *to)
+void synthesize_relcall(void *dest, void *from, void *to)
{
- __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+ __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE);
}
NOKPROBE_SYMBOL(synthesize_relcall);
@@ -346,10 +346,11 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
/*
* Copy an instruction with recovering modified instruction by kprobes
* and adjust the displacement if the instruction uses the %rip-relative
- * addressing mode.
+ * addressing mode. Note that since @real will be the final place of copied
+ * instruction, displacement must be adjust by @real, not @dest.
* This returns the length of copied instruction, or 0 if it has an error.
*/
-int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
+int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
{
kprobe_opcode_t buf[MAX_INSN_SIZE];
unsigned long recovered_insn =
@@ -387,11 +388,11 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
* have given.
*/
newdisp = (u8 *) src + (s64) insn->displacement.value
- - (u8 *) dest;
+ - (u8 *) real;
if ((s64) (s32) newdisp != newdisp) {
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
- src, dest, insn->displacement.value);
+ src, real, insn->displacement.value);
return 0;
}
disp = (u8 *) dest + insn_offset_displacement(insn);
@@ -402,20 +403,38 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
}
/* Prepare reljump right after instruction to boost */
-static void prepare_boost(struct kprobe *p, struct insn *insn)
+static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
+ struct insn *insn)
{
+ int len = insn->length;
+
if (can_boost(insn, p->addr) &&
- MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) {
+ MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) {
/*
* These instructions can be executed directly if it
* jumps back to correct address.
*/
- synthesize_reljump(p->ainsn.insn + insn->length,
+ synthesize_reljump(buf + len, p->ainsn.insn + len,
p->addr + insn->length);
+ len += RELATIVEJUMP_SIZE;
p->ainsn.boostable = true;
} else {
p->ainsn.boostable = false;
}
+
+ return len;
+}
+
+/* Make page to RO mode when allocate it */
+void *alloc_insn_page(void)
+{
+ void *page;
+
+ page = module_alloc(PAGE_SIZE);
+ if (page)
+ set_memory_ro((unsigned long)page & PAGE_MASK, 1);
+
+ return page;
}
/* Recover page to RW mode before releasing it */
@@ -429,12 +448,11 @@ void free_insn_page(void *page)
static int arch_copy_kprobe(struct kprobe *p)
{
struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
int len;
- set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
-
/* Copy an instruction with recovering if other optprobe modifies it.*/
- len = __copy_instruction(p->ainsn.insn, p->addr, &insn);
+ len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn);
if (!len)
return -EINVAL;
@@ -442,15 +460,16 @@ static int arch_copy_kprobe(struct kprobe *p)
* __copy_instruction can modify the displacement of the instruction,
* but it doesn't affect boostable check.
*/
- prepare_boost(p, &insn);
-
- set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
+ len = prepare_boost(buf, p, &insn);
/* Check whether the instruction modifies Interrupt Flag or not */
- p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
+ p->ainsn.if_modifier = is_IF_modifier(buf);
/* Also, displacement change doesn't affect the first byte */
- p->opcode = p->ainsn.insn[0];
+ p->opcode = buf[0];
+
+ /* OK, write back the instruction(s) into ROX insn buffer */
+ text_poke(p->ainsn.insn, buf, len);
return 0;
}
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 041f7b6dfa0f..8dc0161cec8f 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -26,7 +26,7 @@
#include "common.h"
static nokprobe_inline
-int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+void __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, unsigned long orig_ip)
{
/*
@@ -41,33 +41,31 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
__this_cpu_write(current_kprobe, NULL);
if (orig_ip)
regs->ip = orig_ip;
- return 1;
}
int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- if (kprobe_ftrace(p))
- return __skip_singlestep(p, regs, kcb, 0);
- else
- return 0;
+ if (kprobe_ftrace(p)) {
+ __skip_singlestep(p, regs, kcb, 0);
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
}
NOKPROBE_SYMBOL(skip_singlestep);
-/* Ftrace callback handler for kprobes */
+/* Ftrace callback handler for kprobes -- called under preepmt disabed */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
- unsigned long flags;
-
- /* Disable irq for emulating a breakpoint and avoiding preempt */
- local_irq_save(flags);
+ /* Preempt is disabled by ftrace */
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
- goto end;
+ return;
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
@@ -77,17 +75,19 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
+ /* To emulate trap based kprobes, preempt_disable here */
+ preempt_disable();
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs))
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
__skip_singlestep(p, regs, kcb, orig_ip);
+ preempt_enable_no_resched();
+ }
/*
* If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe.
+ * resets current kprobe, and keep preempt count +1.
*/
}
-end:
- local_irq_restore(flags);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4f98aad38237..e941136e24d8 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -142,11 +142,11 @@ void optprobe_template_func(void);
STACK_FRAME_NON_STANDARD(optprobe_template_func);
#define TMPL_MOVE_IDX \
- ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+ ((long)optprobe_template_val - (long)optprobe_template_entry)
#define TMPL_CALL_IDX \
- ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+ ((long)optprobe_template_call - (long)optprobe_template_entry)
#define TMPL_END_IDX \
- ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+ ((long)optprobe_template_end - (long)optprobe_template_entry)
#define INT3_SIZE sizeof(kprobe_opcode_t)
@@ -154,17 +154,15 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func);
static void
optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long flags;
-
/* This is possible if op is under delayed unoptimizing */
if (kprobe_disabled(&op->kp))
return;
- local_irq_save(flags);
+ preempt_disable();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
} else {
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Save skipped registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
@@ -180,17 +178,17 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
}
- local_irq_restore(flags);
+ preempt_enable_no_resched();
}
NOKPROBE_SYMBOL(optimized_callback);
-static int copy_optimized_instructions(u8 *dest, u8 *src)
+static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
{
struct insn insn;
int len = 0, ret;
while (len < RELATIVEJUMP_SIZE) {
- ret = __copy_instruction(dest + len, src + len, &insn);
+ ret = __copy_instruction(dest + len, src + len, real, &insn);
if (!ret || !can_boost(&insn, src + len))
return -EINVAL;
len += ret;
@@ -343,57 +341,66 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
struct kprobe *__unused)
{
- u8 *buf;
- int ret;
+ u8 *buf = NULL, *slot;
+ int ret, len;
long rel;
if (!can_optimize((unsigned long)op->kp.addr))
return -EILSEQ;
- op->optinsn.insn = get_optinsn_slot();
- if (!op->optinsn.insn)
+ buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
+ op->optinsn.insn = slot = get_optinsn_slot();
+ if (!slot) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
/*
* Verify if the address gap is in 2GB range, because this uses
* a relative jump.
*/
- rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+ rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
if (abs(rel) > 0x7fffffff) {
- __arch_remove_optimized_kprobe(op, 0);
- return -ERANGE;
+ ret = -ERANGE;
+ goto err;
}
- buf = (u8 *)op->optinsn.insn;
- set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
+ /* Copy arch-dep-instance from template */
+ memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
/* Copy instructions into the out-of-line buffer */
- ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
- if (ret < 0) {
- __arch_remove_optimized_kprobe(op, 0);
- return ret;
- }
+ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
+ slot + TMPL_END_IDX);
+ if (ret < 0)
+ goto err;
op->optinsn.size = ret;
-
- /* Copy arch-dep-instance from template */
- memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+ len = TMPL_END_IDX + op->optinsn.size;
/* Set probe information */
synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
/* Set probe function call */
- synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+ synthesize_relcall(buf + TMPL_CALL_IDX,
+ slot + TMPL_CALL_IDX, optimized_callback);
/* Set returning jmp instruction at the tail of out-of-line buffer */
- synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+ synthesize_reljump(buf + len, slot + len,
(u8 *)op->kp.addr + op->optinsn.size);
-
- set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
-
- flush_icache_range((unsigned long) buf,
- (unsigned long) buf + TMPL_END_IDX +
- op->optinsn.size + RELATIVEJUMP_SIZE);
- return 0;
+ len += RELATIVEJUMP_SIZE;
+
+ /* We have to use text_poke for instuction buffer because it is RO */
+ text_poke(slot, buf, len);
+ ret = 0;
+out:
+ kfree(buf);
+ return ret;
+
+err:
+ __arch_remove_optimized_kprobe(op, 0);
+ goto out;
}
/*