summaryrefslogtreecommitdiff
path: root/arch/x86/entry
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry')
-rw-r--r--arch/x86/entry/calling.h121
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S223
-rw-r--r--arch/x86/entry/entry_64_compat.S65
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl38
-rw-r--r--arch/x86/entry/vdso/vdso32/vclock_gettime.c2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c18
7 files changed, 228 insertions, 242 deletions
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 3f48f695d5e6..be63330c5511 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,80 +97,78 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
- .macro ALLOC_PT_GPREGS_ON_STACK
- addq $-(15*8), %rsp
- .endm
-
- .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
- .if \r11
- movq %r11, 6*8+\offset(%rsp)
- .endif
- .if \r8910
- movq %r10, 7*8+\offset(%rsp)
- movq %r9, 8*8+\offset(%rsp)
- movq %r8, 9*8+\offset(%rsp)
- .endif
- .if \rax
- movq %rax, 10*8+\offset(%rsp)
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
+ /*
+ * Push registers and sanitize registers of values that a
+ * speculation attack might otherwise want to exploit. The
+ * lower registers are likely clobbered well before they
+ * could be put to use in a speculative execution gadget.
+ * Interleave XOR with PUSH for better uop scheduling:
+ */
+ .if \save_ret
+ pushq %rsi /* pt_regs->si */
+ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
+ movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
+ .else
+ pushq %rdi /* pt_regs->di */
+ pushq %rsi /* pt_regs->si */
.endif
- .if \rcx
- movq %rcx, 11*8+\offset(%rsp)
+ pushq \rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq \rax /* pt_regs->ax */
+ pushq %r8 /* pt_regs->r8 */
+ xorl %r8d, %r8d /* nospec r8 */
+ pushq %r9 /* pt_regs->r9 */
+ xorl %r9d, %r9d /* nospec r9 */
+ pushq %r10 /* pt_regs->r10 */
+ xorl %r10d, %r10d /* nospec r10 */
+ pushq %r11 /* pt_regs->r11 */
+ xorl %r11d, %r11d /* nospec r11*/
+ pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx*/
+ pushq %rbp /* pt_regs->rbp */
+ xorl %ebp, %ebp /* nospec rbp*/
+ pushq %r12 /* pt_regs->r12 */
+ xorl %r12d, %r12d /* nospec r12*/
+ pushq %r13 /* pt_regs->r13 */
+ xorl %r13d, %r13d /* nospec r13*/
+ pushq %r14 /* pt_regs->r14 */
+ xorl %r14d, %r14d /* nospec r14*/
+ pushq %r15 /* pt_regs->r15 */
+ xorl %r15d, %r15d /* nospec r15*/
+ UNWIND_HINT_REGS
+ .if \save_ret
+ pushq %rsi /* return address on top of stack */
.endif
- movq %rdx, 12*8+\offset(%rsp)
- movq %rsi, 13*8+\offset(%rsp)
- movq %rdi, 14*8+\offset(%rsp)
- UNWIND_HINT_REGS offset=\offset extra=0
- .endm
- .macro SAVE_C_REGS offset=0
- SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
- .endm
- .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
- SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
- .endm
- .macro SAVE_C_REGS_EXCEPT_R891011
- SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
- .endm
- .macro SAVE_C_REGS_EXCEPT_RCX_R891011
- SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
- .endm
- .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
- SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
- .endm
-
- .macro SAVE_EXTRA_REGS offset=0
- movq %r15, 0*8+\offset(%rsp)
- movq %r14, 1*8+\offset(%rsp)
- movq %r13, 2*8+\offset(%rsp)
- movq %r12, 3*8+\offset(%rsp)
- movq %rbp, 4*8+\offset(%rsp)
- movq %rbx, 5*8+\offset(%rsp)
- UNWIND_HINT_REGS offset=\offset
- .endm
-
- .macro POP_EXTRA_REGS
+.endm
+
+.macro POP_REGS pop_rdi=1 skip_r11rcx=0
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
- .endm
-
- .macro POP_C_REGS
+ .if \skip_r11rcx
+ popq %rsi
+ .else
popq %r11
+ .endif
popq %r10
popq %r9
popq %r8
popq %rax
+ .if \skip_r11rcx
+ popq %rsi
+ .else
popq %rcx
+ .endif
popq %rdx
popq %rsi
+ .if \pop_rdi
popq %rdi
- .endm
-
- .macro icebp
- .byte 0xf1
- .endm
+ .endif
+.endm
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
@@ -178,17 +176,12 @@ For 32-bit we have the following conventions - kernel is built with
* is just setting the LSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
- * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
* the original rbp.
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0
#ifdef CONFIG_FRAME_POINTER
- .if \ptregs_offset
- leaq \ptregs_offset(%rsp), %rbp
- .else
- mov %rsp, %rbp
- .endif
- orq $0x1, %rbp
+ leaq 1+\ptregs_offset(%rsp), %rbp
#endif
.endm
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c022540d..6ad064c8cf35 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %ebx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 30c8c5344c4a..18ed349b4f83 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -55,7 +55,7 @@ END(native_usergs_sysret64)
.macro TRACE_IRQS_FLAGS flags:req
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, \flags /* interrupts off? */
+ btl $9, \flags /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
@@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64)
swapgs
/*
- * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+ * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
* is not required to switch CR3.
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
@@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64)
pushq %rcx /* pt_regs->ip */
GLOBAL(entry_SYSCALL_64_after_hwframe)
pushq %rax /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- pushq %rdx /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
- pushq $-ENOSYS /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- pushq %r9 /* pt_regs->r9 */
- pushq %r10 /* pt_regs->r10 */
- pushq %r11 /* pt_regs->r11 */
- pushq %rbx /* pt_regs->rbx */
- pushq %rbp /* pt_regs->rbp */
- pushq %r12 /* pt_regs->r12 */
- pushq %r13 /* pt_regs->r13 */
- pushq %r14 /* pt_regs->r14 */
- pushq %r15 /* pt_regs->r15 */
- UNWIND_HINT_REGS
+
+ PUSH_AND_CLEAR_REGS rax=$-ENOSYS
TRACE_IRQS_OFF
@@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
UNWIND_HINT_EMPTY
- POP_EXTRA_REGS
- popq %rsi /* skip r11 */
- popq %r10
- popq %r9
- popq %r8
- popq %rax
- popq %rsi /* skip rcx */
- popq %rdx
- popq %rsi
+ POP_REGS pop_rdi=0 skip_r11rcx=1
/*
* Now all regs are restored except RSP and RDI.
@@ -386,8 +364,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %rbx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
@@ -471,9 +448,19 @@ END(irq_entries_start)
*
* The invariant is that, if irq_count != -1, then the IRQ stack is in use.
*/
-.macro ENTER_IRQ_STACK regs=1 old_rsp
+.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
DEBUG_ENTRY_ASSERT_IRQS_OFF
+
+ .if \save_ret
+ /*
+ * If save_ret is set, the original stack contains one additional
+ * entry -- the return address. Therefore, move the address one
+ * entry below %rsp to \old_rsp.
+ */
+ leaq 8(%rsp), \old_rsp
+ .else
movq %rsp, \old_rsp
+ .endif
.if \regs
UNWIND_HINT_REGS base=\old_rsp
@@ -519,6 +506,15 @@ END(irq_entries_start)
.if \regs
UNWIND_HINT_REGS indirect=1
.endif
+
+ .if \save_ret
+ /*
+ * Push the return address to the stack. This return address can
+ * be found at the "real" original RSP, which was offset by 8 at
+ * the beginning of this macro.
+ */
+ pushq -8(\old_rsp)
+ .endif
.endm
/*
@@ -542,29 +538,65 @@ END(irq_entries_start)
.endm
/*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
+ * Interrupt entry helper function.
*
- * Entry runs with interrupts off.
+ * Entry runs with interrupts off. Stack layout at entry:
+ * +----------------------------------------------------+
+ * | regs->ss |
+ * | regs->rsp |
+ * | regs->eflags |
+ * | regs->cs |
+ * | regs->ip |
+ * +----------------------------------------------------+
+ * | regs->orig_ax = ~(interrupt number) |
+ * +----------------------------------------------------+
+ * | return address |
+ * +----------------------------------------------------+
*/
-
-/* 0(%rsp): ~(interrupt number) */
- .macro interrupt func
+ENTRY(interrupt_entry)
+ UNWIND_HINT_FUNC
+ ASM_CLAC
cld
- testb $3, CS-ORIG_RAX(%rsp)
+ testb $3, CS-ORIG_RAX+8(%rsp)
jz 1f
SWAPGS
- call switch_to_thread_stack
+
+ /*
+ * Switch to the thread stack. The IRET frame and orig_ax are
+ * on the stack, as well as the return address. RDI..R12 are
+ * not (yet) on the stack and space has not (yet) been
+ * allocated for them.
+ */
+ pushq %rdi
+
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ /*
+ * We have RDI, return address, and orig_ax on the stack on
+ * top of the IRET frame. That means offset=24
+ */
+ UNWIND_HINT_IRET_REGS base=%rdi offset=24
+
+ pushq 7*8(%rdi) /* regs->ss */
+ pushq 6*8(%rdi) /* regs->rsp */
+ pushq 5*8(%rdi) /* regs->eflags */
+ pushq 4*8(%rdi) /* regs->cs */
+ pushq 3*8(%rdi) /* regs->ip */
+ pushq 2*8(%rdi) /* regs->orig_ax */
+ pushq 8(%rdi) /* return address */
+ UNWIND_HINT_FUNC
+
+ movq (%rdi), %rdi
1:
- ALLOC_PT_GPREGS_ON_STACK
- SAVE_C_REGS
- SAVE_EXTRA_REGS
- ENCODE_FRAME_POINTER
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
- testb $3, CS(%rsp)
+ testb $3, CS+8(%rsp)
jz 1f
/*
@@ -572,7 +604,7 @@ END(irq_entries_start)
*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks). Since TRACE_IRQS_OFF idempotent,
+ * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
* the simplest way to handle it is to just call it twice if
* we enter from user mode. There's no reason to optimize this since
* TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -582,12 +614,15 @@ END(irq_entries_start)
CALL_enter_from_user_mode
1:
- ENTER_IRQ_STACK old_rsp=%rdi
+ ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
- call \func /* rdi points to pt_regs */
- .endm
+ ret
+END(interrupt_entry)
+
+
+/* Interrupt entry/exit. */
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
@@ -595,9 +630,10 @@ END(irq_entries_start)
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
- ASM_CLAC
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
- interrupt do_IRQ
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call do_IRQ /* rdi points to pt_regs */
/* 0(%rsp): old RSP */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -622,15 +658,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
ud2
1:
#endif
- POP_EXTRA_REGS
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rax
- popq %rcx
- popq %rdx
- popq %rsi
+ POP_REGS pop_rdi=0
/*
* The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
@@ -688,8 +716,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
ud2
1:
#endif
- POP_EXTRA_REGS
- POP_C_REGS
+ POP_REGS
addq $8, %rsp /* skip regs->orig_ax */
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -799,10 +826,11 @@ END(common_interrupt)
.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
UNWIND_HINT_IRET_REGS
- ASM_CLAC
pushq $~(\num)
.Lcommon_\sym:
- interrupt \do_sym
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call \do_sym /* rdi points to pt_regs */
jmp ret_from_intr
END(\sym)
.endm
@@ -865,34 +893,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
-/*
- * Switch to the thread stack. This is called with the IRET frame and
- * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
- * space has not been allocated for them.)
- */
-ENTRY(switch_to_thread_stack)
- UNWIND_HINT_FUNC
-
- pushq %rdi
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
- movq %rsp, %rdi
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-
- pushq 7*8(%rdi) /* regs->ss */
- pushq 6*8(%rdi) /* regs->rsp */
- pushq 5*8(%rdi) /* regs->eflags */
- pushq 4*8(%rdi) /* regs->cs */
- pushq 3*8(%rdi) /* regs->ip */
- pushq 2*8(%rdi) /* regs->orig_ax */
- pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
-
- movq (%rdi), %rdi
- ret
-END(switch_to_thread_stack)
-
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -908,10 +908,8 @@ ENTRY(\sym)
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
- ALLOC_PT_GPREGS_ON_STACK
-
.if \paranoid < 2
- testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
+ testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
jnz .Lfrom_usermode_switch_stack_\@
.endif
@@ -1121,9 +1119,7 @@ ENTRY(xen_failsafe_callback)
addq $0x30, %rsp
UNWIND_HINT_IRET_REGS
pushq $-1 /* orig_ax = -1 => not a system call */
- ALLOC_PT_GPREGS_ON_STACK
- SAVE_C_REGS
- SAVE_EXTRA_REGS
+ PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
@@ -1142,7 +1138,7 @@ apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
#endif /* CONFIG_HYPERV */
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
+idtentry int3 do_int3 has_error_code=0
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
@@ -1170,8 +1166,7 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
ENTRY(paranoid_entry)
UNWIND_HINT_FUNC
cld
- SAVE_C_REGS 8
- SAVE_EXTRA_REGS 8
+ PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
@@ -1211,21 +1206,20 @@ ENTRY(paranoid_exit)
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
+ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
END(paranoid_exit)
/*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Save all registers in pt_regs, and switch GS if needed.
* Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
UNWIND_HINT_FUNC
cld
- SAVE_C_REGS 8
- SAVE_EXTRA_REGS 8
+ PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
- xorl %ebx, %ebx
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1406,22 +1400,7 @@ ENTRY(nmi)
pushq 1*8(%rdx) /* pt_regs->rip */
UNWIND_HINT_IRET_REGS
pushq $-1 /* pt_regs->orig_ax */
- pushq %rdi /* pt_regs->di */
- pushq %rsi /* pt_regs->si */
- pushq (%rdx) /* pt_regs->dx */
- pushq %rcx /* pt_regs->cx */
- pushq %rax /* pt_regs->ax */
- pushq %r8 /* pt_regs->r8 */
- pushq %r9 /* pt_regs->r9 */
- pushq %r10 /* pt_regs->r10 */
- pushq %r11 /* pt_regs->r11 */
- pushq %rbx /* pt_regs->rbx */
- pushq %rbp /* pt_regs->rbp */
- pushq %r12 /* pt_regs->r12 */
- pushq %r13 /* pt_regs->r13 */
- pushq %r14 /* pt_regs->r14 */
- pushq %r15 /* pt_regs->r15 */
- UNWIND_HINT_REGS
+ PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
/*
@@ -1631,7 +1610,6 @@ end_repeat_nmi:
* frame to point back to repeat_nmi.
*/
pushq $-1 /* ORIG_RAX: no syscall to restart */
- ALLOC_PT_GPREGS_ON_STACK
/*
* Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
@@ -1655,8 +1633,7 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
- POP_EXTRA_REGS
- POP_C_REGS
+ POP_REGS
/*
* Skip orig_ax and the "outermost" frame to point RSP at the "iret"
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 98d5358e4041..08425c42f8b7 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
+ xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
+ xorl %r15d, %r15d /* nospec r15 */
cld
/*
@@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
+ xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
+ xorl %r15d, %r15d /* nospec r15 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -278,9 +298,9 @@ sysret32_from_system_call:
*/
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r10
+ xorl %r8d, %r8d
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
swapgs
sysretl
END(entry_SYSCALL_compat)
@@ -327,26 +347,47 @@ ENTRY(entry_INT80_compat)
*/
movl %eax, %eax
+ /* switch to thread stack expects orig_ax and rdi to be pushed */
pushq %rax /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
- /* switch to thread stack expects orig_ax to be pushed */
- call switch_to_thread_stack
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- pushq %rdi /* pt_regs->di */
+ pushq 6*8(%rdi) /* regs->ss */
+ pushq 5*8(%rdi) /* regs->rsp */
+ pushq 4*8(%rdi) /* regs->eflags */
+ pushq 3*8(%rdi) /* regs->cs */
+ pushq 2*8(%rdi) /* regs->ip */
+ pushq 1*8(%rdi) /* regs->orig_ax */
+
+ pushq (%rdi) /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
+ xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp */
+ xorl %ebp, %ebp /* nospec rbp */
pushq %r12 /* pt_regs->r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq %r13 /* pt_regs->r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq %r14 /* pt_regs->r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
+ xorl %r15d, %r15d /* nospec r15 */
cld
/*
@@ -363,15 +404,3 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_ON
jmp swapgs_restore_regs_and_return_to_usermode
END(entry_INT80_compat)
-
-ENTRY(stub32_clone)
- /*
- * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
- * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
- *
- * The native 64-bit kernel's sys_clone() implements the latter,
- * so we need to swap arguments here before calling it:
- */
- xchg %r8, %rcx
- jmp sys_clone
-ENDPROC(stub32_clone)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 448ac2161112..2a5e99cff859 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -8,12 +8,12 @@
#
0 i386 restart_syscall sys_restart_syscall
1 i386 exit sys_exit
-2 i386 fork sys_fork sys_fork
+2 i386 fork sys_fork
3 i386 read sys_read
4 i386 write sys_write
5 i386 open sys_open compat_sys_open
6 i386 close sys_close
-7 i386 waitpid sys_waitpid sys32_waitpid
+7 i386 waitpid sys_waitpid compat_sys_x86_waitpid
8 i386 creat sys_creat
9 i386 link sys_link
10 i386 unlink sys_unlink
@@ -78,7 +78,7 @@
69 i386 ssetmask sys_ssetmask
70 i386 setreuid sys_setreuid16
71 i386 setregid sys_setregid16
-72 i386 sigsuspend sys_sigsuspend sys_sigsuspend
+72 i386 sigsuspend sys_sigsuspend
73 i386 sigpending sys_sigpending compat_sys_sigpending
74 i386 sethostname sys_sethostname
75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
@@ -96,7 +96,7 @@
87 i386 swapon sys_swapon
88 i386 reboot sys_reboot
89 i386 readdir sys_old_readdir compat_sys_old_readdir
-90 i386 mmap sys_old_mmap sys32_mmap
+90 i386 mmap sys_old_mmap compat_sys_x86_mmap
91 i386 munmap sys_munmap
92 i386 truncate sys_truncate compat_sys_truncate
93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
@@ -126,7 +126,7 @@
117 i386 ipc sys_ipc compat_sys_ipc
118 i386 fsync sys_fsync
119 i386 sigreturn sys_sigreturn sys32_sigreturn
-120 i386 clone sys_clone stub32_clone
+120 i386 clone sys_clone compat_sys_x86_clone
121 i386 setdomainname sys_setdomainname
122 i386 uname sys_newuname
123 i386 modify_ldt sys_modify_ldt
@@ -186,8 +186,8 @@
177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
179 i386 rt_sigsuspend sys_rt_sigsuspend
-180 i386 pread64 sys_pread64 sys32_pread
-181 i386 pwrite64 sys_pwrite64 sys32_pwrite
+180 i386 pread64 sys_pread64 compat_sys_x86_pread
+181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite
182 i386 chown sys_chown16
183 i386 getcwd sys_getcwd
184 i386 capget sys_capget
@@ -196,14 +196,14 @@
187 i386 sendfile sys_sendfile compat_sys_sendfile
188 i386 getpmsg
189 i386 putpmsg
-190 i386 vfork sys_vfork sys_vfork
+190 i386 vfork sys_vfork
191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
192 i386 mmap2 sys_mmap_pgoff
-193 i386 truncate64 sys_truncate64 sys32_truncate64
-194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
-195 i386 stat64 sys_stat64 sys32_stat64
-196 i386 lstat64 sys_lstat64 sys32_lstat64
-197 i386 fstat64 sys_fstat64 sys32_fstat64
+193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64
+194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64
+195 i386 stat64 sys_stat64 compat_sys_x86_stat64
+196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64
+197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64
198 i386 lchown32 sys_lchown
199 i386 getuid32 sys_getuid
200 i386 getgid32 sys_getgid
@@ -231,7 +231,7 @@
# 222 is unused
# 223 is unused
224 i386 gettid sys_gettid
-225 i386 readahead sys_readahead sys32_readahead
+225 i386 readahead sys_readahead compat_sys_x86_readahead
226 i386 setxattr sys_setxattr
227 i386 lsetxattr sys_lsetxattr
228 i386 fsetxattr sys_fsetxattr
@@ -256,7 +256,7 @@
247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
248 i386 io_submit sys_io_submit compat_sys_io_submit
249 i386 io_cancel sys_io_cancel
-250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
+250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
252 i386 exit_group sys_exit_group
253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
@@ -278,7 +278,7 @@
269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
270 i386 tgkill sys_tgkill
271 i386 utimes sys_utimes compat_sys_utimes
-272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
+272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64
273 i386 vserver
274 i386 mbind sys_mbind
275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
@@ -306,7 +306,7 @@
297 i386 mknodat sys_mknodat
298 i386 fchownat sys_fchownat
299 i386 futimesat sys_futimesat compat_sys_futimesat
-300 i386 fstatat64 sys_fstatat64 sys32_fstatat
+300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat
301 i386 unlinkat sys_unlinkat
302 i386 renameat sys_renameat
303 i386 linkat sys_linkat
@@ -320,7 +320,7 @@
311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
313 i386 splice sys_splice
-314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
+314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range
315 i386 tee sys_tee
316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
317 i386 move_pages sys_move_pages compat_sys_move_pages
@@ -330,7 +330,7 @@
321 i386 signalfd sys_signalfd compat_sys_signalfd
322 i386 timerfd_create sys_timerfd_create
323 i386 eventfd sys_eventfd
-324 i386 fallocate sys_fallocate sys32_fallocate
+324 i386 fallocate sys_fallocate compat_sys_x86_fallocate
325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 7780bbfb06ef..9242b28418d5 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -5,8 +5,6 @@
#undef CONFIG_OPTIMIZE_INLINING
#endif
-#undef CONFIG_X86_PPRO_FENCE
-
#ifdef CONFIG_X86_64
/*
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 577fa8adb785..317be365bce3 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,10 +42,8 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
-#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
- NATIVE;
-#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
+static enum { EMULATE, NONE } vsyscall_mode =
+#ifdef CONFIG_LEGACY_VSYSCALL_NONE
NONE;
#else
EMULATE;
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
if (str) {
if (!strcmp("emulate", str))
vsyscall_mode = EMULATE;
- else if (!strcmp("native", str))
- vsyscall_mode = NATIVE;
else if (!strcmp("none", str))
vsyscall_mode = NONE;
else
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
WARN_ON_ONCE(address != regs->ip);
- /* This should be unreachable in NATIVE mode. */
- if (WARN_ON(vsyscall_mode == NATIVE))
- return false;
-
if (vsyscall_mode == NONE) {
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall attempted with vsyscall=none");
@@ -355,7 +347,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
#if CONFIG_PGTABLE_LEVELS >= 5
- p4d->p4d |= _PAGE_USER;
+ set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER));
#endif
pud = pud_offset(p4d, VSYSCALL_ADDR);
set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
if (vsyscall_mode != NONE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
- vsyscall_mode == NATIVE
- ? PAGE_KERNEL_VSYSCALL
- : PAGE_KERNEL_VVAR);
+ PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}