summaryrefslogtreecommitdiff
path: root/arch/x86/entry
diff options
context:
space:
mode:
authorJosh Poimboeuf <jpoimboe@redhat.com>2016-10-20 11:34:40 -0500
committerIngo Molnar <mingo@kernel.org>2016-10-21 09:26:03 +0200
commit946c191161cef10c667b5ee3179db1714fa5b7c0 (patch)
tree0b939091e3aa93f5699c256418fdec5d3a7c1283 /arch/x86/entry
parent29a6d7964d6853f5bcd84dfb92c074fb41d00563 (diff)
x86/entry/unwind: Create stack frames for saved interrupt registers
With frame pointers, when a task is interrupted, its stack is no longer completely reliable because the function could have been interrupted before it had a chance to save the previous frame pointer on the stack. So the caller of the interrupted function could get skipped by a stack trace. This is problematic for live patching, which needs to know whether a stack trace of a sleeping task can be relied upon. There's currently no way to detect if a sleeping task was interrupted by a page fault exception or preemption before it went to sleep. Another issue is that when dumping the stack of an interrupted task, the unwinder has no way of knowing where the saved pt_regs registers are, so it can't print them. This solves those issues by encoding the pt_regs pointer in the frame pointer on entry from an interrupt or an exception. This patch also updates the unwinder to be able to decode it, because otherwise the unwinder would be broken by this change. Note that this causes a change in the behavior of the unwinder: each instance of a pt_regs on the stack is now considered a "frame". So callers of unwind_get_return_address() will now get an occasional 'regs->ip' address that would have previously been skipped over. Suggested-by: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/8b9f84a21e39d249049e0547b559ff8da0df0988.1476973742.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/entry')
-rw-r--r--arch/x86/entry/calling.h20
-rw-r--r--arch/x86/entry/entry_32.S33
-rw-r--r--arch/x86/entry/entry_64.S10
3 files changed, 56 insertions, 7 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 38dcdfa2be55..05ed3d393da7 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -192,6 +192,26 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+ .if \ptregs_offset
+ leaq \ptregs_offset(%rsp), %rbp
+ .else
+ mov %rsp, %rbp
+ .endif
+ orq $0x1, %rbp
+#endif
+.endm
+
#endif /* CONFIG_X86_64 */
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 22251055c5b7..acc0c6f36f3f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -176,6 +176,22 @@
SET_KERNEL_GS %edx
.endm
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original rbp.
+ */
+.macro ENCODE_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
+ mov %esp, %ebp
+ orl $0x1, %ebp
+#endif
+.endm
+
.macro RESTORE_INT_REGS
popl %ebx
popl %ecx
@@ -641,6 +657,7 @@ common_interrupt:
ASM_CLAC
addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
SAVE_ALL
+ ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
movl %esp, %eax
call do_IRQ
@@ -652,6 +669,7 @@ ENTRY(name) \
ASM_CLAC; \
pushl $~(nr); \
SAVE_ALL; \
+ ENCODE_FRAME_POINTER; \
TRACE_IRQS_OFF \
movl %esp, %eax; \
call fn; \
@@ -786,6 +804,7 @@ END(spurious_interrupt_bug)
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
+ ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
/*
@@ -840,6 +859,7 @@ ENTRY(xen_failsafe_callback)
jmp iret_exc
5: pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
+ ENCODE_FRAME_POINTER
jmp ret_from_exception
.section .fixup, "ax"
@@ -1067,6 +1087,7 @@ common_exception:
pushl %edx
pushl %ecx
pushl %ebx
+ ENCODE_FRAME_POINTER
cld
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
@@ -1099,6 +1120,7 @@ ENTRY(debug)
ASM_CLAC
pushl $-1 # mark this as an int
SAVE_ALL
+ ENCODE_FRAME_POINTER
xorl %edx, %edx # error code 0
movl %esp, %eax # pt_regs pointer
@@ -1114,11 +1136,11 @@ ENTRY(debug)
.Ldebug_from_sysenter_stack:
/* We're on the SYSENTER stack. Switch off. */
- movl %esp, %ebp
+ movl %esp, %ebx
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
TRACE_IRQS_OFF
call do_debug
- movl %ebp, %esp
+ movl %ebx, %esp
jmp ret_from_exception
END(debug)
@@ -1141,6 +1163,7 @@ ENTRY(nmi)
pushl %eax # pt_regs->orig_ax
SAVE_ALL
+ ENCODE_FRAME_POINTER
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
@@ -1159,10 +1182,10 @@ ENTRY(nmi)
* We're on the SYSENTER stack. Switch off. No one (not even debug)
* is using the thread stack right now, so it's safe for us to use it.
*/
- movl %esp, %ebp
+ movl %esp, %ebx
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
call do_nmi
- movl %ebp, %esp
+ movl %ebx, %esp
jmp .Lrestore_all_notrace
#ifdef CONFIG_X86_ESPFIX32
@@ -1179,6 +1202,7 @@ ENTRY(nmi)
.endr
pushl %eax
SAVE_ALL
+ ENCODE_FRAME_POINTER
FIXUP_ESPFIX_STACK # %eax == %esp
xorl %edx, %edx # zero error code
call do_nmi
@@ -1192,6 +1216,7 @@ ENTRY(int3)
ASM_CLAC
pushl $-1 # mark this as an int
SAVE_ALL
+ ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ef766a358b37..65fad8a63cd0 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -469,6 +469,7 @@ END(irq_entries_start)
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
+ ENCODE_FRAME_POINTER
testb $3, CS(%rsp)
jz 1f
@@ -985,6 +986,7 @@ ENTRY(xen_failsafe_callback)
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
+ ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
@@ -1028,6 +1030,7 @@ ENTRY(paranoid_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
+ ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
rdmsr
@@ -1075,6 +1078,7 @@ ENTRY(error_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
+ ENCODE_FRAME_POINTER 8
xorl %ebx, %ebx
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1257,6 +1261,7 @@ ENTRY(nmi)
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
+ ENCODE_FRAME_POINTER
/*
* At this point we no longer need to worry about stack damage
@@ -1270,11 +1275,10 @@ ENTRY(nmi)
/*
* Return back to user mode. We must *not* do the normal exit
- * work, because we don't want to enable interrupts. Fortunately,
- * do_nmi doesn't modify pt_regs.
+ * work, because we don't want to enable interrupts.
*/
SWAPGS
- jmp restore_c_regs_and_iret
+ jmp restore_regs_and_iret
.Lnmi_from_kernel:
/*