From b332828c39326b1dca617f387dd15d12e81cd5f0 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:10 +0530 Subject: hw-breakpoints: prepare the code for Hardware Breakpoint interfaces The generic hardware breakpoint interface provides an abstraction of hardware breakpoints in front of specific arch implementations for both kernel and user side breakpoints. This includes execution breakpoints and read/write breakpoints, also known as "watchpoints". This patch introduces header files containing constants, structure definitions and declaration of functions used by the hardware breakpoint core and x86 specific code. It also introduces an array based storage for the debug-register values in 'struct thread_struct', while modifying all users of debugreg member in the structure. [ Impact: add headers for new hardware breakpoint interface ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/ptrace.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/ptrace.c') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde91c13..313be40be55a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -471,10 +471,10 @@ static int genregs_set(struct task_struct *target, static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) { switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; + case 0: return child->thread.debugreg[0]; + case 1: return child->thread.debugreg[1]; + case 2: return child->thread.debugreg[2]; + case 3: return child->thread.debugreg[3]; case 6: return child->thread.debugreg6; case 7: return child->thread.debugreg7; } @@ -493,10 +493,10 @@ static int ptrace_set_debugreg(struct task_struct *child, return -EIO; switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; + case 0: child->thread.debugreg[0] = data; break; + case 1: child->thread.debugreg[1] = data; break; + case 2: child->thread.debugreg[2] = data; break; + case 3: child->thread.debugreg[3] = data; break; case 6: if ((data & ~0xffffffffUL) != 0) -- cgit From 72f674d203cd230426437cdcf7dd6f681dad8b0d Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:45:48 +0530 Subject: hw-breakpoints: modify Ptrace routines to access breakpoint registers This patch modifies the ptrace code to use the new wrapper routines around the debug/breakpoint registers. [ Impact: adapt x86 ptrace to the new breakpoint Api ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Signed-off-by: Maneesh Soni Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/ptrace.c | 231 +++++++++++++++++++++++++++++------------------ 1 file changed, 141 insertions(+), 90 deletions(-) (limited to 'arch/x86/kernel/ptrace.c') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 313be40be55a..b457f78b7dbf 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -136,11 +137,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -265,15 +261,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE_MAX - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -464,95 +451,159 @@ static int genregs_set(struct task_struct *target, } /* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, + unsigned *type) { - switch (n) { - case 0: return child->thread.debugreg[0]; - case 1: return child->thread.debugreg[1]; - case 2: return child->thread.debugreg[2]; - case 3: return child->thread.debugreg[3]; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; - } - return 0; + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); + + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) { + struct thread_struct *thread = &(current->thread); int i; - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < hbp_kernel_pos; i++) + /* + * We will check bp->info.address against the address stored in + * thread's hbp structure and not debugreg[i]. This is to ensure + * that the corresponding bit for 'i' in DR7 register is enabled + */ + if (bp->info.address == thread->hbp[i]->info.address) + break; - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + thread->debugreg6 |= (DR_TRAP0 << i); +} - switch (n) { - case 0: child->thread.debugreg[0] = data; break; - case 1: child->thread.debugreg[1] = data; break; - case 2: child->thread.debugreg[2] = data; break; - case 3: child->thread.debugreg[3] = data; break; +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7 = thread->debugreg7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct hw_breakpoint *bp; + + data &= ~DR_CONTROL_RESERVED; +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->hbp[i]; + + if (!enabled) { + if (bp) { + /* Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + unregister_user_hw_breakpoint(tsk, bp); + kfree(bp); + } + continue; + } + if (!bp) { + rc = -ENOMEM; + bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); + if (bp) { + bp->info.address = thread->debugreg[i]; + bp->triggered = ptrace_triggered; + bp->info.len = len; + bp->info.type = type; + rc = register_user_hw_breakpoint(tsk, bp); + if (rc) + kfree(bp); + } + } else + rc = modify_user_hw_breakpoint(tsk, bp); + if (rc) + break; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; + + if (n < HBP_NUM) + val = thread->debugreg[n]; + else if (n == 6) + val = thread->debugreg6; + else if (n == 7) + val = thread->debugreg7; + return val; +} - case 7: - /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) - */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; +/* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + tsk->thread.debugreg6 = val; + goto ret_path; } + if (n < HBP_NUM) { + if (thread->hbp[n]) { + if (arch_check_va_in_userspace(val, + thread->hbp[n]->info.len) == 0) { + rc = -EIO; + goto ret_path; + } + thread->hbp[n]->info.address = val; + } + thread->debugreg[n] = val; + } + /* All that's left is DR7 */ + if (n == 7) + rc = ptrace_write_dr7(tsk, val); - return 0; +ret_path: + return rc; } /* -- cgit From 9d22b536609abf0d64648f99518676ea58245e3b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 19:52:30 +0530 Subject: x86: Mark ptrace_get_debugreg() as static This sparse warning: arch/x86/kernel/ptrace.c:560:15: warning: symbol 'ptrace_get_debugreg' was not declared. Should it be static? triggers because ptrace_get_debugreg() is global but is only used in a single .c file. change ptrace_get_debugreg() to static to fix that - this also addresses the sparse warning. Signed-off-by: Jaswinder Singh Rajput Cc: Steven Rostedt LKML-Reference: <1246458150.6940.19.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/ptrace.c') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b457f78b7dbf..cabdabce3cb2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -557,7 +557,7 @@ restore: /* * Handle PTRACE_PEEKUSR calls for the debug register area. */ -unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; -- cgit From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2009 19:22:48 +0200 Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt --- arch/x86/kernel/ptrace.c | 182 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 125 insertions(+), 57 deletions(-) (limited to 'arch/x86/kernel/ptrace.c') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 267cb85b479c..e79610d95971 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target, return ret; } -/* - * Decode the length and type bits for a particular breakpoint as - * stored in debug register 7. Return the "enabled" status. - */ -static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, - unsigned *type) -{ - int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - - *len = (bp_info & 0xc) | 0x40; - *type = (bp_info & 0x3) | 0x80; - return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; -} - -static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) +static void ptrace_triggered(struct perf_event *bp, void *data) { - struct thread_struct *thread = &(current->thread); int i; + struct thread_struct *thread = &(current->thread); /* * Store in the virtual DR6 register the fact that the breakpoint * was hit so the thread's debugger will see it. */ - for (i = 0; i < hbp_kernel_pos; i++) - /* - * We will check bp->info.address against the address stored in - * thread's hbp structure and not debugreg[i]. This is to ensure - * that the corresponding bit for 'i' in DR7 register is enabled - */ - if (bp->info.address == thread->hbp[i]->info.address) + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) break; + } thread->debugreg6 |= (DR_TRAP0 << i); } +/* + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * + */ +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) +{ + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } + } + + return dr7; +} + /* * Handle ptrace writes to debug register 7. */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { struct thread_struct *thread = &(tsk->thread); - unsigned long old_dr7 = thread->debugreg7; + unsigned long old_dr7; int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; - struct hw_breakpoint *bp; + int gen_len, gen_type; + struct perf_event *bp; data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: /* * Loop through all the hardware breakpoints, making the @@ -496,11 +503,12 @@ restore: */ for (i = 0; i < HBP_NUM; i++) { enabled = decode_dr7(data, i, &len, &type); - bp = thread->hbp[i]; + bp = thread->ptrace_bps[i]; if (!enabled) { if (bp) { - /* Don't unregister the breakpoints right-away, + /* + * Don't unregister the breakpoints right-away, * unless all register_user_hw_breakpoint() * requests have succeeded. This prevents * any window of opportunity for debug @@ -508,27 +516,45 @@ restore: */ if (!second_pass) continue; - unregister_user_hw_breakpoint(tsk, bp); - kfree(bp); + thread->ptrace_bps[i] = NULL; + unregister_hw_breakpoint(bp); } continue; } + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ if (!bp) { - rc = -ENOMEM; - bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (bp) { - bp->info.address = thread->debugreg[i]; - bp->triggered = ptrace_triggered; - bp->info.len = len; - bp->info.type = type; - rc = register_user_hw_breakpoint(tsk, bp); - if (rc) - kfree(bp); - } - } else - rc = modify_user_hw_breakpoint(tsk, bp); + rc = -EINVAL; + break; + } + + rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); if (rc) break; + + /* + * This is a temporary thing as bp is unregistered/registered + * to simulate modification + */ + bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, + gen_type, bp->callback, + tsk, true); + thread->ptrace_bps[i] = NULL; + + if (!bp) { /* incorrect bp, or we have a bug in bp API */ + rc = -EINVAL; + break; + } + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + bp = NULL; + break; + } + thread->ptrace_bps[i] = bp; } /* * Make a second pass to free the remaining unused breakpoints @@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; - if (n < HBP_NUM) - val = thread->debugreg[n]; - else if (n == 6) + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { val = thread->debugreg6; - else if (n == 7) - val = thread->debugreg7; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } return val; } +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + + if (!t->ptrace_bps[nr]) { + /* + * Put stub len and type to register (reserve) an inactive but + * correct bp + */ + bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, + HW_BREAKPOINT_W, + ptrace_triggered, tsk, + false); + } else { + bp = t->ptrace_bps[nr]; + t->ptrace_bps[nr] = NULL; + bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, + bp->attr.bp_type, + bp->callback, + tsk, + bp->attr.disabled); + } + + if (!bp) + return -EIO; + /* + * CHECKME: the previous code returned -EIO if the addr wasn't a + * valid task virtual addr. The new one will return -EINVAL in this + * case. + * -EINVAL may be what we want for in-kernel breakpoints users, but + * -EIO looks better for ptrace, since we refuse a register writing + * for the user. And anyway this is the previous behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; + + return 0; +} + /* * Handle PTRACE_POKEUSR calls for the debug register area. */ @@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) return -EIO; if (n == 6) { - tsk->thread.debugreg6 = val; + thread->debugreg6 = val; goto ret_path; } if (n < HBP_NUM) { - if (thread->hbp[n]) { - if (arch_check_va_in_userspace(val, - thread->hbp[n]->info.len) == 0) { - rc = -EIO; - goto ret_path; - } - thread->hbp[n]->info.address = val; - } - thread->debugreg[n] = val; + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; } /* All that's left is DR7 */ if (n == 7) -- cgit