summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/orc-unwinder.txt2
-rw-r--r--Makefile4
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug39
-rw-r--r--arch/x86/configs/tiny.config4
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/chacha20-avx2-x86_64.S4
-rw-r--r--arch/x86/crypto/chacha20-ssse3-x86_64.S4
-rw-r--r--arch/x86/entry/calling.h69
-rw-r--r--arch/x86/entry/entry_64.S141
-rw-r--r--arch/x86/entry/entry_64_compat.S3
-rw-r--r--arch/x86/include/asm/archrandom.h8
-rw-r--r--arch/x86/include/asm/bitops.h10
-rw-r--r--arch/x86/include/asm/compat.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h9
-rw-r--r--arch/x86/include/asm/cpufeatures.h11
-rw-r--r--arch/x86/include/asm/module.h2
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/processor.h52
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/switch_to.h24
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/trace/fpu.h10
-rw-r--r--arch/x86/include/asm/traps.h20
-rw-r--r--arch/x86/include/asm/unwind.h8
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h3
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c29
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c121
-rw-r--r--arch/x86/kernel/fpu/init.c11
-rw-r--r--arch/x86/kernel/fpu/xstate.c43
-rw-r--r--arch/x86/kernel/head_32.S5
-rw-r--r--arch/x86/kernel/head_64.S34
-rw-r--r--arch/x86/kernel/ldt.c16
-rw-r--r--arch/x86/kernel/process.c8
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kernel/verify_cpu.S3
-rw-r--r--arch/x86/kernel/vm86_32.c20
-rw-r--r--arch/x86/mm/fault.c88
-rw-r--r--arch/x86/um/ldt.c7
-rw-r--r--arch/x86/xen/enlighten_pv.c9
-rw-r--r--arch/x86/xen/smp_pv.c17
-rw-r--r--arch/x86/xen/xen-asm_64.S2
-rw-r--r--arch/x86/xen/xen-head.S11
-rw-r--r--drivers/infiniband/core/nldev.c4
-rw-r--r--drivers/mmc/host/renesas_sdhi_internal_dmac.c17
-rw-r--r--drivers/mmc/host/tmio_mmc_core.c13
-rw-r--r--include/asm-generic/vmlinux.lds.h2
-rw-r--r--include/linux/pm_qos.h3
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/ioremap.c1
-rw-r--r--scripts/Makefile.build2
-rw-r--r--tools/objtool/check.c7
-rw-r--r--tools/objtool/objtool.c6
61 files changed, 582 insertions, 374 deletions
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
index af0c9a4c65a6..cd4b29be29af 100644
--- a/Documentation/x86/orc-unwinder.txt
+++ b/Documentation/x86/orc-unwinder.txt
@@ -4,7 +4,7 @@ ORC unwinder
Overview
--------
-The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
+The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
similar in concept to a DWARF unwinder. The difference is that the
format of the ORC data is much simpler than DWARF, which in turn allows
the ORC unwinder to be much simpler and faster.
diff --git a/Makefile b/Makefile
index 5f91a28a3cea..9f6894531f9e 100644
--- a/Makefile
+++ b/Makefile
@@ -933,8 +933,8 @@ ifdef CONFIG_STACK_VALIDATION
ifeq ($(has_libelf),1)
objtool_target := tools/objtool FORCE
else
- ifdef CONFIG_ORC_UNWINDER
- $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
+ ifdef CONFIG_UNWINDER_ORC
+ $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
else
$(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 32779beb56e2..9ab7629feac7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -170,7 +170,7 @@ config X86
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
select HAVE_STACK_VALIDATION if X86_64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 71a48a30fc84..a4ff214fb760 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -358,28 +358,14 @@ config PUNIT_ATOM_DEBUG
choice
prompt "Choose kernel unwinder"
- default FRAME_POINTER_UNWINDER
+ default UNWINDER_ORC if X86_64
+ default UNWINDER_FRAME_POINTER if X86_32
---help---
This determines which method will be used for unwinding kernel stack
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
livepatch, lockdep, and more.
-config FRAME_POINTER_UNWINDER
- bool "Frame pointer unwinder"
- select FRAME_POINTER
- ---help---
- This option enables the frame pointer unwinder for unwinding kernel
- stack traces.
-
- The unwinder itself is fast and it uses less RAM than the ORC
- unwinder, but the kernel text size will grow by ~3% and the kernel's
- overall performance will degrade by roughly 5-10%.
-
- This option is recommended if you want to use the livepatch
- consistency model, as this is currently the only way to get a
- reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-
-config ORC_UNWINDER
+config UNWINDER_ORC
bool "ORC unwinder"
depends on X86_64
select STACK_VALIDATION
@@ -395,7 +381,22 @@ config ORC_UNWINDER
Enabling this option will increase the kernel's runtime memory usage
by roughly 2-4MB, depending on your kernel config.
-config GUESS_UNWINDER
+config UNWINDER_FRAME_POINTER
+ bool "Frame pointer unwinder"
+ select FRAME_POINTER
+ ---help---
+ This option enables the frame pointer unwinder for unwinding kernel
+ stack traces.
+
+ The unwinder itself is fast and it uses less RAM than the ORC
+ unwinder, but the kernel text size will grow by ~3% and the kernel's
+ overall performance will degrade by roughly 5-10%.
+
+ This option is recommended if you want to use the livepatch
+ consistency model, as this is currently the only way to get a
+ reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
+
+config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
---help---
@@ -410,7 +411,7 @@ config GUESS_UNWINDER
endchoice
config FRAME_POINTER
- depends on !ORC_UNWINDER && !GUESS_UNWINDER
+ depends on !UNWINDER_ORC && !UNWINDER_GUESS
bool
endmenu
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 550cd5012b73..66c9e2aab16c 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1,5 +1,5 @@
CONFIG_NOHIGHMEM=y
# CONFIG_HIGHMEM4G is not set
# CONFIG_HIGHMEM64G is not set
-CONFIG_GUESS_UNWINDER=y
-# CONFIG_FRAME_POINTER_UNWINDER is not set
+CONFIG_UNWINDER_GUESS=y
+# CONFIG_UNWINDER_FRAME_POINTER is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4a4b16e56d35..e32fc1f274d8 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_BOOT_PARAMS=y
CONFIG_OPTIMIZE_INLINING=y
+CONFIG_UNWINDER_ORC=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S
index 3a2dc3dc6cac..f3cd26f48332 100644
--- a/arch/x86/crypto/chacha20-avx2-x86_64.S
+++ b/arch/x86/crypto/chacha20-avx2-x86_64.S
@@ -45,7 +45,7 @@ ENTRY(chacha20_8block_xor_avx2)
vzeroupper
# 4 * 32 byte stack, 32-byte aligned
- mov %rsp, %r8
+ lea 8(%rsp),%r10
and $~31, %rsp
sub $0x80, %rsp
@@ -443,6 +443,6 @@ ENTRY(chacha20_8block_xor_avx2)
vmovdqu %ymm15,0x01e0(%rsi)
vzeroupper
- mov %r8,%rsp
+ lea -8(%r10),%rsp
ret
ENDPROC(chacha20_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
index 3f511a7d73b8..512a2b500fd1 100644
--- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S
@@ -160,7 +160,7 @@ ENTRY(chacha20_4block_xor_ssse3)
# done with the slightly better performing SSSE3 byte shuffling,
# 7/12-bit word rotation uses traditional shift+OR.
- mov %rsp,%r11
+ lea 8(%rsp),%r10
sub $0x80,%rsp
and $~63,%rsp
@@ -625,6 +625,6 @@ ENTRY(chacha20_4block_xor_ssse3)
pxor %xmm1,%xmm15
movdqu %xmm15,0xf0(%rsi)
- mov %r11,%rsp
+ lea -8(%r10),%rsp
ret
ENDPROC(chacha20_4block_xor_ssse3)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 640aafebdc00..1895a685d3dd 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -141,56 +141,25 @@ For 32-bit we have the following conventions - kernel is built with
UNWIND_HINT_REGS offset=\offset
.endm
- .macro RESTORE_EXTRA_REGS offset=0
- movq 0*8+\offset(%rsp), %r15
- movq 1*8+\offset(%rsp), %r14
- movq 2*8+\offset(%rsp), %r13
- movq 3*8+\offset(%rsp), %r12
- movq 4*8+\offset(%rsp), %rbp
- movq 5*8+\offset(%rsp), %rbx
- UNWIND_HINT_REGS offset=\offset extra=0
- .endm
-
- .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
- .if \rstor_r11
- movq 6*8(%rsp), %r11
- .endif
- .if \rstor_r8910
- movq 7*8(%rsp), %r10
- movq 8*8(%rsp), %r9
- movq 9*8(%rsp), %r8
- .endif
- .if \rstor_rax
- movq 10*8(%rsp), %rax
- .endif
- .if \rstor_rcx
- movq 11*8(%rsp), %rcx
- .endif
- .if \rstor_rdx
- movq 12*8(%rsp), %rdx
- .endif
- movq 13*8(%rsp), %rsi
- movq 14*8(%rsp), %rdi
- UNWIND_HINT_IRET_REGS offset=16*8
- .endm
- .macro RESTORE_C_REGS
- RESTORE_C_REGS_HELPER 1,1,1,1,1
- .endm
- .macro RESTORE_C_REGS_EXCEPT_RAX
- RESTORE_C_REGS_HELPER 0,1,1,1,1
- .endm
- .macro RESTORE_C_REGS_EXCEPT_RCX
- RESTORE_C_REGS_HELPER 1,0,1,1,1
- .endm
- .macro RESTORE_C_REGS_EXCEPT_R11
- RESTORE_C_REGS_HELPER 1,1,0,1,1
- .endm
- .macro RESTORE_C_REGS_EXCEPT_RCX_R11
- RESTORE_C_REGS_HELPER 1,0,0,1,1
- .endm
-
- .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
- subq $-(15*8+\addskip), %rsp
+ .macro POP_EXTRA_REGS
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ .endm
+
+ .macro POP_C_REGS
+ popq %r11
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
.endm
.macro icebp
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f6cdb7a1455e..84263c79a119 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -220,10 +220,9 @@ entry_SYSCALL_64_fastpath:
TRACE_IRQS_ON /* user mode is traced as IRQs on */
movq RIP(%rsp), %rcx
movq EFLAGS(%rsp), %r11
- RESTORE_C_REGS_EXCEPT_RCX_R11
- movq RSP(%rsp), %rsp
+ addq $6*8, %rsp /* skip extra regs -- they were preserved */
UNWIND_HINT_EMPTY
- USERGS_SYSRET64
+ jmp .Lpop_c_regs_except_rcx_r11_and_sysret
1:
/*
@@ -245,17 +244,18 @@ entry_SYSCALL64_slow_path:
call do_syscall_64 /* returns with IRQs disabled */
return_from_SYSCALL_64:
- RESTORE_EXTRA_REGS
TRACE_IRQS_IRETQ /* we're about to change IF */
/*
* Try to use SYSRET instead of IRET if we're returning to
- * a completely clean 64-bit userspace context.
+ * a completely clean 64-bit userspace context. If we're not,
+ * go to the slow exit path.
*/
movq RCX(%rsp), %rcx
movq RIP(%rsp), %r11
- cmpq %rcx, %r11 /* RCX == RIP */
- jne opportunistic_sysret_failed
+
+ cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
@@ -273,14 +273,14 @@ return_from_SYSCALL_64:
/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
movq R11(%rsp), %r11
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
@@ -301,12 +301,12 @@ return_from_SYSCALL_64:
* would never get past 'stuck_here'.
*/
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
- jnz opportunistic_sysret_failed
+ jnz swapgs_restore_regs_and_return_to_usermode
/* nothing to check for RSP */
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
- jne opportunistic_sysret_failed
+ jne swapgs_restore_regs_and_return_to_usermode
/*
* We win! This label is here just for ease of understanding
@@ -314,14 +314,20 @@ return_from_SYSCALL_64:
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
- RESTORE_C_REGS_EXCEPT_RCX_R11
- movq RSP(%rsp), %rsp
UNWIND_HINT_EMPTY
+ POP_EXTRA_REGS
+.Lpop_c_regs_except_rcx_r11_and_sysret:
+ popq %rsi /* skip r11 */
+ popq %r10
+ popq %r9
+ popq %r8
+ popq %rax
+ popq %rsi /* skip rcx */
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ movq RSP-ORIG_RAX(%rsp), %rsp
USERGS_SYSRET64
-
-opportunistic_sysret_failed:
- SWAPGS
- jmp restore_c_regs_and_iret
END(entry_SYSCALL_64)
ENTRY(stub_ptregs_64)
@@ -422,8 +428,7 @@ ENTRY(ret_from_fork)
movq %rsp, %rdi
call syscall_return_slowpath /* returns with IRQs disabled */
TRACE_IRQS_ON /* user mode is traced as IRQS on */
- SWAPGS
- jmp restore_regs_and_iret
+ jmp swapgs_restore_regs_and_return_to_usermode
1:
/* kernel thread */
@@ -611,8 +616,21 @@ GLOBAL(retint_user)
mov %rsp,%rdi
call prepare_exit_to_usermode
TRACE_IRQS_IRETQ
+
+GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+#ifdef CONFIG_DEBUG_ENTRY
+ /* Assert that pt_regs indicates user mode. */
+ testb $3, CS(%rsp)
+ jnz 1f
+ ud2
+1:
+#endif
SWAPGS
- jmp restore_regs_and_iret
+ POP_EXTRA_REGS
+ POP_C_REGS
+ addq $8, %rsp /* skip regs->orig_ax */
+ INTERRUPT_RETURN
+
/* Returning to kernel space */
retint_kernel:
@@ -632,15 +650,17 @@ retint_kernel:
*/
TRACE_IRQS_IRETQ
-/*
- * At this label, code paths which return to kernel and to user,
- * which come from interrupts/exception and from syscalls, merge.
- */
-GLOBAL(restore_regs_and_iret)
- RESTORE_EXTRA_REGS
-restore_c_regs_and_iret:
- RESTORE_C_REGS
- REMOVE_PT_GPREGS_FROM_STACK 8
+GLOBAL(restore_regs_and_return_to_kernel)
+#ifdef CONFIG_DEBUG_ENTRY
+ /* Assert that pt_regs indicates kernel mode. */
+ testb $3, CS(%rsp)
+ jz 1f
+ ud2
+1:
+#endif
+ POP_EXTRA_REGS
+ POP_C_REGS
+ addq $8, %rsp /* skip regs->orig_ax */
INTERRUPT_RETURN
ENTRY(native_iret)
@@ -817,7 +837,7 @@ ENTRY(\sym)
ASM_CLAC
- .ifeq \has_error_code
+ .if \has_error_code == 0
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
@@ -1058,6 +1078,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
+idtentry xennmi do_nmi has_error_code=0
idtentry xendebug do_debug has_error_code=0
idtentry xenint3 do_int3 has_error_code=0
#endif
@@ -1111,17 +1132,14 @@ ENTRY(paranoid_exit)
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF_DEBUG
testl %ebx, %ebx /* swapgs needed? */
- jnz paranoid_exit_no_swapgs
+ jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
SWAPGS_UNSAFE_STACK
- jmp paranoid_exit_restore
-paranoid_exit_no_swapgs:
+ jmp .Lparanoid_exit_restore
+.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
-paranoid_exit_restore:
- RESTORE_EXTRA_REGS
- RESTORE_C_REGS
- REMOVE_PT_GPREGS_FROM_STACK 8
- INTERRUPT_RETURN
+.Lparanoid_exit_restore:
+ jmp restore_regs_and_return_to_kernel
END(paranoid_exit)
/*
@@ -1222,10 +1240,13 @@ ENTRY(error_exit)
jmp retint_user
END(error_exit)
-/* Runs on exception stack */
-/* XXX: broken on Xen PV */
+/*
+ * Runs on exception stack. Xen PV does not go through this path at all,
+ * so we can use real assembly here.
+ */
ENTRY(nmi)
UNWIND_HINT_IRET_REGS
+
/*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
@@ -1283,7 +1304,7 @@ ENTRY(nmi)
* stacks lest we corrupt the "NMI executing" variable.
*/
- SWAPGS_UNSAFE_STACK
+ swapgs
cld
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1327,8 +1348,7 @@ ENTRY(nmi)
* Return back to user mode. We must *not* do the normal exit
* work, because we don't want to enable interrupts.
*/
- SWAPGS
- jmp restore_regs_and_iret
+ jmp swapgs_restore_regs_and_return_to_usermode
.Lnmi_from_kernel:
/*
@@ -1449,7 +1469,7 @@ nested_nmi_out:
popq %rdx
/* We are returning to kernel mode, so this cannot result in a fault. */
- INTERRUPT_RETURN
+ iretq
first_nmi:
/* Restore rdx. */
@@ -1480,7 +1500,7 @@ first_nmi:
pushfq /* RFLAGS */
pushq $__KERNEL_CS /* CS */
pushq $1f /* RIP */
- INTERRUPT_RETURN /* continues at repeat_nmi below */
+ iretq /* continues at repeat_nmi below */
UNWIND_HINT_IRET_REGS
1:
#endif
@@ -1543,29 +1563,34 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
- RESTORE_EXTRA_REGS
- RESTORE_C_REGS
+ POP_EXTRA_REGS
+ POP_C_REGS
- /* Point RSP at the "iret" frame. */
- REMOVE_PT_GPREGS_FROM_STACK 6*8
+ /*
+ * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
+ * at the "iret" frame.
+ */
+ addq $6*8, %rsp
/*
* Clear "NMI executing". Set DF first so that we can easily
* distinguish the remaining code between here and IRET from
- * the SYSCALL entry and exit paths. On a native kernel, we
- * could just inspect RIP, but, on paravirt kernels,
- * INTERRUPT_RETURN can translate into a jump into a
- * hypercall page.
+ * the SYSCALL entry and exit paths.
+ *
+ * We arguably should just inspect RIP instead, but I (Andy) wrote
+ * this code when I had the misapprehension that Xen PV supported
+ * NMIs, and Xen PV would break that approach.
*/
std
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
- * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
- * stack in a single instruction. We are returning to kernel
- * mode, so this cannot result in a fault.
+ * iretq reads the "iret" frame and exits the NMI stack in a
+ * single instruction. We are returning to kernel mode, so this
+ * cannot result in a fault. Similarly, we don't need to worry
+ * about espfix64 on the way back to kernel mode.
*/
- INTERRUPT_RETURN
+ iretq
END(nmi)
ENTRY(ignore_sysret)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e26c25ca7756..932b96ce1b06 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -336,8 +336,7 @@ ENTRY(entry_INT80_compat)
/* Go back to user mode. */
TRACE_IRQS_ON
- SWAPGS
- jmp restore_regs_and_iret
+ jmp swapgs_restore_regs_and_return_to_usermode
END(entry_INT80_compat)
ENTRY(stub32_clone)
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 5b0579abb398..3ac991d81e74 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
- asm volatile(RDRAND_LONG "\n\t"
+ asm volatile(RDRAND_LONG
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
if (ok)
@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
bool ok;
unsigned int retry = RDRAND_RETRY_LOOPS;
do {
- asm volatile(RDRAND_INT "\n\t"
+ asm volatile(RDRAND_INT
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
if (ok)
@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
static inline bool rdseed_long(unsigned long *v)
{
bool ok;
- asm volatile(RDSEED_LONG "\n\t"
+ asm volatile(RDSEED_LONG
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
return ok;
@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
static inline bool rdseed_int(unsigned int *v)
{
bool ok;
- asm volatile(RDSEED_INT "\n\t"
+ asm volatile(RDSEED_INT
CC_SET(c)
: CC_OUT(c) (ok), "=a" (*v));
return ok;
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 854022772c5b..8cee8db6dffb 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -142,7 +142,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
{
bool negative;
- asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
+ asm volatile(LOCK_PREFIX "andb %2,%1"
CC_SET(s)
: CC_OUT(s) (negative), ADDR
: "ir" ((char) ~(1 << nr)) : "memory");
@@ -245,7 +245,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
{
bool oldbit;
- asm("bts %2,%1\n\t"
+ asm("bts %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -285,7 +285,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
{
bool oldbit;
- asm volatile("btr %2,%1\n\t"
+ asm volatile("btr %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -297,7 +297,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
{
bool oldbit;
- asm volatile("btc %2,%1\n\t"
+ asm volatile("btc %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr) : "memory");
@@ -328,7 +328,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
{
bool oldbit;
- asm volatile("bt %2,%1\n\t"
+ asm volatile("bt %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 5343c19814b3..948b6d8ec46f 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -6,6 +6,7 @@
*/
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <asm/processor.h>
#include <asm/user32.h>
#include <asm/unistd.h>
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index d59c15c3defd..225fd8374fae 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -125,11 +125,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
-#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
-#define setup_clear_cpu_cap(bit) do { \
- clear_cpu_cap(&boot_cpu_data, bit); \
- set_bit(bit, (unsigned long *)cpu_caps_cleared); \
-} while (0)
+
+extern void setup_clear_cpu_cap(unsigned int bit);
+extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
+
#define setup_force_cpu_cap(bit) do { \
set_cpu_cap(&boot_cpu_data, bit); \
set_bit(bit, (unsigned long *)cpu_caps_set); \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2519c6c801c9..b0556f882aa8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -21,6 +21,11 @@
* this feature bit is not displayed in /proc/cpuinfo at all.
*/
+/*
+ * When adding new features here that depend on other features,
+ * please update the table in kernel/cpu/cpuid-deps.c
+ */
+
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
@@ -294,6 +299,12 @@
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
+#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 9eb7c718aaf8..9f05a1002aa9 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,7 +5,7 @@
#include <asm/orc_types.h>
struct mod_arch_specific {
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 12deec722cf0..43d4f90edebc 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -15,10 +15,9 @@
#include <linux/cpumask.h>
#include <asm/frame.h>
-static inline void load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+static inline void load_sp0(unsigned long sp0)
{
- PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
+ PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
}
/* The paravirtualized CPUID instruction. */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 280d94c36dad..a916788ac478 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -133,7 +133,7 @@ struct pv_cpu_ops {
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
- void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
+ void (*load_sp0)(unsigned long sp0);
void (*set_iopl_mask)(unsigned mask);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 9fa03604b2b3..b21a475fd7ed 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -525,7 +525,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
{
bool oldbit;
- asm volatile("bt "__percpu_arg(2)",%1\n\t"
+ asm volatile("bt "__percpu_arg(2)",%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b390ff76e58f..f10dae14f951 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -430,7 +430,9 @@ typedef struct {
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
+#ifdef CONFIG_X86_32
unsigned long sp0;
+#endif
unsigned long sp;
#ifdef CONFIG_X86_32
unsigned long sysenter_cs;
@@ -517,16 +519,9 @@ static inline void native_set_iopl_mask(unsigned mask)
}
static inline void
-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
+native_load_sp0(unsigned long sp0)
{
- tss->x86_tss.sp0 = thread->sp0;
-#ifdef CONFIG_X86_32
- /* Only happens when SEP is enabled, no need to test "SEP"arately: */
- if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
- tss->x86_tss.ss1 = thread->sysenter_cs;
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
- }
-#endif
+ this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
}
static inline void native_swapgs(void)
@@ -546,15 +541,20 @@ static inline unsigned long current_top_of_stack(void)
#endif
}
+static inline bool on_thread_stack(void)
+{
+ return (unsigned long)(current_top_of_stack() -
+ current_stack_pointer) < THREAD_SIZE;
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
#define __cpuid native_cpuid
-static inline void load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+static inline void load_sp0(unsigned long sp0)
{
- native_load_sp0(tss, thread);
+ native_load_sp0(sp0);
}
#define set_iopl_mask native_set_iopl_mask
@@ -803,6 +803,15 @@ static inline void spin_lock_prefetch(const void *x)
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
+#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
+
+#define task_pt_regs(task) \
+({ \
+ unsigned long __ptr = (unsigned long)task_stack_page(task); \
+ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
+ ((struct pt_regs *)__ptr) - 1; \
+})
+
#ifdef CONFIG_X86_32
/*
* User space process size: 3GB (default).
@@ -822,23 +831,6 @@ static inline void spin_lock_prefetch(const void *x)
.addr_limit = KERNEL_DS, \
}
-/*
- * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
- * This is necessary to guarantee that the entire "struct pt_regs"
- * is accessible even if the CPU haven't stored the SS/ESP registers
- * on the stack (interrupt gate does not save these registers
- * when switching to the same priv ring).
- * Therefore beware: accessing the ss/esp fields of the
- * "struct pt_regs" is possible, but they may contain the
- * completely wrong values.
- */
-#define task_pt_regs(task) \
-({ \
- unsigned long __ptr = (unsigned long)task_stack_page(task); \
- __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
- ((struct pt_regs *)__ptr) - 1; \
-})
-
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
@@ -872,11 +864,9 @@ static inline void spin_lock_prefetch(const void *x)
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
- .sp0 = TOP_OF_INIT_STACK, \
.addr_limit = KERNEL_DS, \
}
-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 91c04c8e67fa..e2afbf689309 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -135,9 +135,9 @@ static inline int v8086_mode(struct pt_regs *regs)
#endif
}
-#ifdef CONFIG_X86_64
static inline bool user_64bit_mode(struct pt_regs *regs)
{
+#ifdef CONFIG_X86_64
#ifndef CONFIG_PARAVIRT
/*
* On non-paravirt systems, this is the only long mode CPL 3
@@ -148,8 +148,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
/* Headers are too twisted for this to go in paravirt.h. */
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
#endif
+#else /* !CONFIG_X86_64 */
+ return false;
+#endif
}
+#ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 045f99211a99..0c411c8bbdbd 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -28,7 +28,7 @@ cc_label: \
#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
do { \
bool c; \
- asm volatile (fullop ";" CC_SET(cc) \
+ asm volatile (fullop CC_SET(cc) \
: [counter] "+m" (var), CC_OUT(cc) (c) \
: __VA_ARGS__ : clobbers); \
return c; \
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index fcc5cd387fd1..010cd6e4eafc 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_SWITCH_TO_H
#define _ASM_X86_SWITCH_TO_H
+#include <linux/sched/task_stack.h>
+
struct task_struct; /* one of the stranger aspects of C forward declarations */
struct task_struct *__switch_to_asm(struct task_struct *prev,
@@ -72,4 +74,26 @@ do { \
((last) = __switch_to_asm((prev), (next))); \
} while (0)
+#ifdef CONFIG_X86_32
+static inline void refresh_sysenter_cs(struct thread_struct *thread)
+{
+ /* Only happens when SEP is enabled, no need to test "SEP"arately: */
+ if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+ return;
+
+ this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+}
+#endif
+
+/* This is used when switching tasks or entering/exiting vm86 mode. */
+static inline void update_sp0(struct task_struct *task)
+{
+#ifdef CONFIG_X86_32
+ load_sp0(task->thread.sp0);
+#else
+ load_sp0(task_top_of_stack(task));
+#endif
+}
+
#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 91dfcafe27a6..bad25bb80679 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
asmlinkage long sys_iopl(unsigned int);
/* kernel/ldt.c */
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
/* kernel/signal.c */
asmlinkage long sys_rt_sigreturn(void);
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 39f7a27bef13..6b086c39e6dc 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -33,11 +33,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_state,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
@@ -73,11 +68,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 5545f6459bf5..e76ce80ca18b 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -37,9 +37,9 @@ asmlinkage void simd_coprocessor_error(void);
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
asmlinkage void xen_divide_error(void);
+asmlinkage void xen_xennmi(void);
asmlinkage void xen_xendebug(void);
asmlinkage void xen_xenint3(void);
-asmlinkage void xen_nmi(void);
asmlinkage void xen_overflow(void);
asmlinkage void xen_bounds(void);
asmlinkage void xen_invalid_op(void);
@@ -144,4 +144,22 @@ enum {
X86_TRAP_IRET = 32, /* 32, IRET Exception */
};
+/*
+ * Page fault error code bits:
+ *
+ * bit 0 == 0: no page found 1: protection fault
+ * bit 1 == 0: read access 1: write access
+ * bit 2 == 0: kernel-mode access 1: user-mode access
+ * bit 3 == 1: use of reserved bit detected
+ * bit 4 == 1: fault was an instruction fetch
+ * bit 5 == 1: protection keys block access
+ */
+enum x86_pf_error_code {
+ X86_PF_PROT = 1 << 0,
+ X86_PF_WRITE = 1 << 1,
+ X86_PF_USER = 1 << 2,
+ X86_PF_RSVD = 1 << 3,
+ X86_PF_INSTR = 1 << 4,
+ X86_PF_PK = 1 << 5,
+};
#endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index e9f793e2df7a..35d67dc7b69f 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -12,11 +12,11 @@ struct unwind_state {
struct task_struct *task;
int graph_idx;
bool error;
-#if defined(CONFIG_ORC_UNWINDER)
+#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
unsigned long sp, bp, ip;
struct pt_regs *regs;
-#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
+#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
struct pt_regs *regs;
@@ -50,7 +50,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
__unwind_start(state, task, regs, first_frame);
}
-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
+#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
{
if (unwind_done(state))
@@ -65,7 +65,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
}
#endif
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
void unwind_init(void);
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
void *orc, size_t orc_size);
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 185f3d10c194..39946d0a1d41 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -151,5 +151,8 @@
#define CX86_ARR_BASE 0xc4
#define CX86_RCR_BASE 0xdc
+#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
+ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
+ X86_CR0_PG)
#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a97a6b611531..d449c5a9db96 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -27,7 +27,6 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
-OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
@@ -128,9 +127,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
-obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
-obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
-obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
+obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
+obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
+obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index e17942c131c8..de260fae1017 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -22,6 +22,7 @@ obj-y += rdrand.o
obj-y += match.o
obj-y += bugs.o
obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
+obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176bae7fd8..cdf79ab628c2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1301,18 +1301,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
pr_cont(")\n");
}
-static __init int setup_disablecpuid(char *arg)
+/*
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
+ * But we need to keep a dummy __setup around otherwise it would
+ * show up as an environment variable for init.
+ */
+static __init int setup_clearcpuid(char *arg)
{
- int bit;
-
- if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
- setup_clear_cpu_cap(bit);
- else
- return 0;
-
return 1;
}
-__setup("clearcpuid=", setup_disablecpuid);
+__setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1572,9 +1570,13 @@ void cpu_init(void)
initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, me);
- load_sp0(t, &current->thread);
+ /*
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
+ * task never enters user mode.
+ */
set_tss_desc(cpu, t);
load_TR_desc();
+
load_mm_ldt(&init_mm);
clear_all_debug_regs();
@@ -1596,7 +1598,6 @@ void cpu_init(void)
int cpu = smp_processor_id();
struct task_struct *curr = current;
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
- struct thread_struct *thread = &curr->thread;
wait_for_master_cpu(cpu);
@@ -1627,9 +1628,13 @@ void cpu_init(void)
initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, curr);
- load_sp0(t, thread);
+ /*
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
+ * task never enters user mode.
+ */
set_tss_desc(cpu, t);
load_TR_desc();
+
load_mm_ldt(&init_mm);
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 000000000000..904b0a3c4e53
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,121 @@
+/* Declare dependencies between CPUIDs */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cpufeature.h>
+
+struct cpuid_dep {
+ unsigned int feature;
+ unsigned int depends;
+};
+
+/*
+ * Table of CPUID features that depend on others.
+ *
+ * This only includes dependencies that can be usefully disabled, not
+ * features part of the base set (like FPU).
+ *
+ * Note this all is not __init / __initdata because it can be
+ * called from cpu hotplug. It shouldn't do anything in this case,
+ * but it's difficult to tell that to the init reference checker.
+ */
+const static struct cpuid_dep cpuid_deps[] = {
+ { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
+ { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
+ { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM2, X86_FEATURE_XMM },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
+ { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
+ { X86_FEATURE_AES, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
+ { X86_FEATURE_FMA, X86_FEATURE_AVX },
+ { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
+ {}
+};
+
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ /*
+ * Note: This could use the non atomic __*_bit() variants, but the
+ * rest of the cpufeature code uses atomics as well, so keep it for
+ * consistency. Cleanup all of it separately.
+ */
+ if (!c) {
+ clear_cpu_cap(&boot_cpu_data, feature);
+ set_bit(feature, (unsigned long *)cpu_caps_cleared);
+ } else {
+ clear_bit(feature, (unsigned long *)c->x86_capability);
+ }
+}
+
+/* Take the capabilities and the BUG bits into account */
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
+
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
+ const struct cpuid_dep *d;
+ bool changed;
+
+ if (WARN_ON(feature >= MAX_FEATURE_BITS))
+ return;
+
+ clear_feature(c, feature);
+
+ /* Collect all features to disable, handling dependencies */
+ memset(disable, 0, sizeof(disable));
+ __set_bit(feature, disable);
+
+ /* Loop until we get a stable state. */
+ do {
+ changed = false;
+ for (d = cpuid_deps; d->feature; d++) {
+ if (!test_bit(d->depends, disable))
+ continue;
+ if (__test_and_set_bit(d->feature, disable))
+ continue;
+
+ changed = true;
+ clear_feature(c, d->feature);
+ }
+ } while (changed);
+}
+
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ do_clear_cpu_cap(c, feature);
+}
+
+void setup_clear_cpu_cap(unsigned int feature)
+{
+ do_clear_cpu_cap(NULL, feature);
+}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e3d9a5..6abd83572b01 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
*/
static void __init fpu__init_parse_early_param(void)
{
+ char arg[32];
+ char *argptr = arg;
+ int bit;
+
if (cmdline_find_option_bool(boot_command_line, "no387"))
setup_clear_cpu_cap(X86_FEATURE_FPU);
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+ if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
+ sizeof(arg)) &&
+ get_option(&argptr, &bit) &&
+ bit >= 0 &&
+ bit < NCAPINTS * 32)
+ setup_clear_cpu_cap(bit);
}
/*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476c9022..87a57b7642d3 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -15,6 +15,7 @@
#include <asm/fpu/xstate.h>
#include <asm/tlbflush.h>
+#include <asm/cpufeature.h>
/*
* Although we spell it out in here, the Processor Trace
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
"unknown xstate feature" ,
};
+static short xsave_cpuid_features[] __initdata = {
+ X86_FEATURE_FPU,
+ X86_FEATURE_XMM,
+ X86_FEATURE_AVX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_INTEL_PT,
+ X86_FEATURE_PKU,
+};
+
/*
* Mask of xstate features supported by the CPU and the kernel:
*/
@@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
void fpu__xstate_clear_all_cpu_caps(void)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
- setup_clear_cpu_cap(X86_FEATURE_AVX);
- setup_clear_cpu_cap(X86_FEATURE_AVX2);
- setup_clear_cpu_cap(X86_FEATURE_AVX512F);
- setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
- setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
- setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
- setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
- setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
- setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
- setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
- setup_clear_cpu_cap(X86_FEATURE_MPX);
- setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
- setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
- setup_clear_cpu_cap(X86_FEATURE_PKU);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
}
/*
@@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
unsigned int eax, ebx, ecx, edx;
static int on_boot_cpu __initdata = 1;
int err;
+ int i;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
goto out_disable;
}
+ /*
+ * Clear XSAVE features that are disabled in the normal CPUID.
+ */
+ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
+ if (!boot_cpu_has(xsave_cpuid_features[i]))
+ xfeatures_mask &= ~BIT(i);
+ }
+
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
/* Enable xstate instructions to be able to continue with initialization: */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 9ed3074d0d27..62474df4e3e7 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -211,9 +211,6 @@ ENTRY(startup_32_smp)
#endif
.Ldefault_entry:
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
- X86_CR0_PG)
movl $(CR0_STATE & ~X86_CR0_PG),%eax
movl %eax,%cr0
@@ -401,7 +398,7 @@ ENTRY(early_idt_handler_array)
# 24(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
pushl $0 # Dummy error code, to make stack frame uniform
.endif
pushl $i # 20(%esp) Vector number
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 2be7d1e7fcf1..c03e02a83c4e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -50,6 +50,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
.code64
.globl startup_64
startup_64:
+ UNWIND_HINT_EMPTY
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
* and someone has loaded an identity mapped page table
@@ -89,6 +90,7 @@ startup_64:
addq $(early_top_pgt - __START_KERNEL_map), %rax
jmp 1f
ENTRY(secondary_startup_64)
+ UNWIND_HINT_EMPTY
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
* and someone has loaded a mapped page table.
@@ -133,6 +135,7 @@ ENTRY(secondary_startup_64)
movq $1f, %rax
jmp *%rax
1:
+ UNWIND_HINT_EMPTY
/* Check if nx is implemented */
movl $0x80000001, %eax
@@ -150,9 +153,6 @@ ENTRY(secondary_startup_64)
1: wrmsr /* Make changes effective */
/* Setup cr0 */
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
- X86_CR0_PG)
movl $CR0_STATE, %eax
/* Make changes effective */
movq %rax, %cr0
@@ -235,7 +235,7 @@ ENTRY(secondary_startup_64)
pushq %rax # target address in negative space
lretq
.Lafter_lret:
-ENDPROC(secondary_startup_64)
+END(secondary_startup_64)
#include "verify_cpu.S"
@@ -247,6 +247,7 @@ ENDPROC(secondary_startup_64)
*/
ENTRY(start_cpu0)
movq initial_stack(%rip), %rsp
+ UNWIND_HINT_EMPTY
jmp .Ljump_to_C_code
ENDPROC(start_cpu0)
#endif
@@ -266,26 +267,24 @@ ENDPROC(start_cpu0)
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
__FINITDATA
-bad_address:
- jmp bad_address
-
__INIT
ENTRY(early_idt_handler_array)
- # 104(%rsp) %rflags
- # 96(%rsp) %cs
- # 88(%rsp) %rip
- # 80(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
- pushq $0 # Dummy error code, to make stack frame uniform
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
+ UNWIND_HINT_IRET_REGS
+ pushq $0 # Dummy error code, to make stack frame uniform
+ .else
+ UNWIND_HINT_IRET_REGS offset=8
.endif
pushq $i # 72(%rsp) Vector number
jmp early_idt_handler_common
+ UNWIND_HINT_IRET_REGS
i = i + 1
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
-ENDPROC(early_idt_handler_array)
+ UNWIND_HINT_IRET_REGS offset=16
+END(early_idt_handler_array)
early_idt_handler_common:
/*
@@ -313,6 +312,7 @@ early_idt_handler_common:
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
+ UNWIND_HINT_REGS
cmpq $14,%rsi /* Page fault? */
jnz 10f
@@ -327,8 +327,8 @@ early_idt_handler_common:
20:
decl early_recursion_flag(%rip)
- jmp restore_regs_and_iret
-ENDPROC(early_idt_handler_common)
+ jmp restore_regs_and_return_to_kernel
+END(early_idt_handler_common)
__INITDATA
@@ -435,7 +435,7 @@ ENTRY(phys_base)
EXPORT_SYMBOL(phys_base)
#include "../../x86/xen/xen-head.S"
-
+
__PAGE_ALIGNED_BSS
NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index f0e64db18ac8..0402d44deb4d 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
@@ -294,8 +295,8 @@ out:
return error;
}
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
- unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
int ret = -ENOSYS;
@@ -313,5 +314,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
ret = write_ldt(ptr, bytecount, 0);
break;
}
- return ret;
+ /*
+ * The SYSCALL_DEFINE() macros give us an 'unsigned long'
+ * return type, but tht ABI for sys_modify_ldt() expects
+ * 'int'. This cast gives us an int-sized value in %rax
+ * for the return code. The 'unsigned' is necessary so
+ * the compiler does not try to sign-extend the negative
+ * return codes into the high half of the register when
+ * taking the value from int->long.
+ */
+ return (unsigned int)ret;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bd6b85fac666..ff8a9acbcf8b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -48,7 +48,13 @@
*/
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
.x86_tss = {
- .sp0 = TOP_OF_INIT_STACK,
+ /*
+ * .sp0 is only used when entering ring 0 from a lower
+ * privilege level. Since the init task never runs anything
+ * but ring 0 code, there is no need for a valid value here.
+ * Poison it.
+ */
+ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 11966251cd42..45bf0c5f93e1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Reload esp0 and cpu_current_top_of_stack. This changes
- * current_thread_info().
+ * current_thread_info(). Refresh the SYSENTER configuration in
+ * case prev or next is vm86.
*/
- load_sp0(tss, next);
+ update_sp0(next_p);
+ refresh_sysenter_cs(next);
this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 302e7b2572d1..eeeb34f85c25 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
struct inactive_task_frame *frame;
struct task_struct *me = current;
- p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
fork_frame = container_of(childregs, struct fork_frame, regs);
frame = &fork_frame->frame;
@@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
this_cpu_write(current_task, next_p);
- /* Reload esp0 and ss1. This changes current_thread_info(). */
- load_sp0(tss, next);
+ /* Reload sp0. */
+ update_sp0(next_p);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ad59edd84de7..06c18fe1c09e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -961,8 +961,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
- per_cpu(cpu_current_top_of_stack, cpu) =
- (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+ per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 67db4f43309e..42a9c4458f5d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
* will catch asm bugs and any attempt to use ist_preempt_enable
* from double_fault.
*/
- BUG_ON((unsigned long)(current_top_of_stack() -
- current_stack_pointer) >= THREAD_SIZE);
+ BUG_ON(!on_thread_stack());
preempt_enable_no_resched();
}
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 014ea59aa153..3d3c2f71f617 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -33,7 +33,7 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
-verify_cpu:
+ENTRY(verify_cpu)
pushf # Save caller passed flags
push $0 # Kill any dangerous flags
popf
@@ -139,3 +139,4 @@ verify_cpu:
popf # Restore caller passed flags
xorl %eax, %eax
ret
+ENDPROC(verify_cpu)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 7924a5356c8a..a7b44c75c642 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -54,6 +54,7 @@
#include <asm/irq.h>
#include <asm/traps.h>
#include <asm/vm86.h>
+#include <asm/switch_to.h>
/*
* Known problems:
@@ -93,7 +94,6 @@
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
{
- struct tss_struct *tss;
struct task_struct *tsk = current;
struct vm86plus_struct __user *user;
struct vm86 *vm86 = current->thread.vm86;
@@ -145,12 +145,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
do_exit(SIGSEGV);
}
- tss = &per_cpu(cpu_tss, get_cpu());
+ preempt_disable();
tsk->thread.sp0 = vm86->saved_sp0;
tsk->thread.sysenter_cs = __KERNEL_CS;
- load_sp0(tss, &tsk->thread);
+ update_sp0(tsk);
+ refresh_sysenter_cs(&tsk->thread);
vm86->saved_sp0 = 0;
- put_cpu();
+ preempt_enable();
memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
@@ -236,7 +237,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
{
- struct tss_struct *tss;
struct task_struct *tsk = current;
struct vm86 *vm86 = tsk->thread.vm86;
struct kernel_vm86_regs vm86regs;
@@ -364,15 +364,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
vm86->saved_sp0 = tsk->thread.sp0;
lazy_save_gs(vm86->regs32.gs);
- tss = &per_cpu(cpu_tss, get_cpu());
/* make room for real-mode segments */
+ preempt_disable();
tsk->thread.sp0 += 16;
- if (static_cpu_has(X86_FEATURE_SEP))
+ if (static_cpu_has(X86_FEATURE_SEP)) {
tsk->thread.sysenter_cs = 0;
+ refresh_sysenter_cs(&tsk->thread);
+ }
- load_sp0(tss, &tsk->thread);
- put_cpu();
+ update_sp0(tsk);
+ preempt_enable();
if (vm86->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e2baeaa053a5..db71c73530bd 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -29,26 +29,6 @@
#include <asm/trace/exceptions.h>
/*
- * Page fault error code bits:
- *
- * bit 0 == 0: no page found 1: protection fault
- * bit 1 == 0: read access 1: write access
- * bit 2 == 0: kernel-mode access 1: user-mode access
- * bit 3 == 1: use of reserved bit detected
- * bit 4 == 1: fault was an instruction fetch
- * bit 5 == 1: protection keys block access
- */
-enum x86_pf_error_code {
-
- PF_PROT = 1 << 0,
- PF_WRITE = 1 << 1,
- PF_USER = 1 << 2,
- PF_RSVD = 1 << 3,
- PF_INSTR = 1 << 4,
- PF_PK = 1 << 5,
-};
-
-/*
* Returns 0 if mmiotrace is disabled, or if the fault is not
* handled by mmiotrace:
*/
@@ -149,7 +129,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* If it was a exec (instruction fetch) fault on NX page, then
* do not ignore the fault:
*/
- if (error_code & PF_INSTR)
+ if (error_code & X86_PF_INSTR)
return 0;
instr = (void *)convert_ip_to_linear(current, regs);
@@ -179,7 +159,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* siginfo so userspace can discover which protection key was set
* on the PTE.
*
- * If we get here, we know that the hardware signaled a PF_PK
+ * If we get here, we know that the hardware signaled a X86_PF_PK
* fault and that there was a VMA once we got in the fault
* handler. It does *not* guarantee that the VMA we find here
* was the one that we faulted on.
@@ -204,7 +184,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
/*
* force_sig_info_fault() is called from a number of
* contexts, some of which have a VMA and some of which
- * do not. The PF_PK handing happens after we have a
+ * do not. The X86_PF_PK handing happens after we have a
* valid VMA, so we should never reach this without a
* valid VMA.
*/
@@ -697,7 +677,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
if (!oops_may_print())
return;
- if (error_code & PF_INSTR) {
+ if (error_code & X86_PF_INSTR) {
unsigned int level;
pgd_t *pgd;
pte_t *pte;
@@ -779,7 +759,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
*/
if (current->thread.sig_on_uaccess_err && signal) {
tsk->thread.trap_nr = X86_TRAP_PF;
- tsk->thread.error_code = error_code | PF_USER;
+ tsk->thread.error_code = error_code | X86_PF_USER;
tsk->thread.cr2 = address;
/* XXX: hwpoison faults will set the wrong code. */
@@ -897,7 +877,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
struct task_struct *tsk = current;
/* User mode accesses just cause a SIGSEGV */
- if (error_code & PF_USER) {
+ if (error_code & X86_PF_USER) {
/*
* It's possible to have interrupts off here:
*/
@@ -918,7 +898,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
* Instruction fetch faults in the vsyscall page might need
* emulation.
*/
- if (unlikely((error_code & PF_INSTR) &&
+ if (unlikely((error_code & X86_PF_INSTR) &&
((address & ~0xfff) == VSYSCALL_ADDR))) {
if (emulate_vsyscall(regs, address))
return;
@@ -931,7 +911,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
* are always protection faults.
*/
if (address >= TASK_SIZE_MAX)
- error_code |= PF_PROT;
+ error_code |= X86_PF_PROT;
if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
@@ -992,11 +972,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return false;
- if (error_code & PF_PK)
+ if (error_code & X86_PF_PK)
return true;
/* this checks permission keys on the VMA: */
- if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
- (error_code & PF_INSTR), foreign))
+ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
+ (error_code & X86_PF_INSTR), foreign))
return true;
return false;
}
@@ -1024,7 +1004,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
int code = BUS_ADRERR;
/* Kernel mode? Handle exceptions or die: */
- if (!(error_code & PF_USER)) {
+ if (!(error_code & X86_PF_USER)) {
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
return;
}
@@ -1052,14 +1032,14 @@ static noinline void
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 *pkey, unsigned int fault)
{
- if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
+ if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
no_context(regs, error_code, address, 0, 0);
return;
}
if (fault & VM_FAULT_OOM) {
/* Kernel mode? Handle exceptions or die: */
- if (!(error_code & PF_USER)) {
+ if (!(error_code & X86_PF_USER)) {
no_context(regs, error_code, address,
SIGSEGV, SEGV_MAPERR);
return;
@@ -1084,16 +1064,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
{
- if ((error_code & PF_WRITE) && !pte_write(*pte))
+ if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
return 0;
- if ((error_code & PF_INSTR) && !pte_exec(*pte))
+ if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
return 0;
/*
* Note: We do not do lazy flushing on protection key
- * changes, so no spurious fault will ever set PF_PK.
+ * changes, so no spurious fault will ever set X86_PF_PK.
*/
- if ((error_code & PF_PK))
+ if ((error_code & X86_PF_PK))
return 1;
return 1;
@@ -1139,8 +1119,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
* change, so user accesses are not expected to cause spurious
* faults.
*/
- if (error_code != (PF_WRITE | PF_PROT)
- && error_code != (PF_INSTR | PF_PROT))
+ if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
+ error_code != (X86_PF_INSTR | X86_PF_PROT))
return 0;
pgd = init_mm.pgd + pgd_index(address);
@@ -1200,19 +1180,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
* always an unconditional error and can never result in
* a follow-up action to resolve the fault, like a COW.
*/
- if (error_code & PF_PK)
+ if (error_code & X86_PF_PK)
return 1;
/*
* Make sure to check the VMA so that we do not perform
- * faults just to hit a PF_PK as soon as we fill in a
+ * faults just to hit a X86_PF_PK as soon as we fill in a
* page.
*/
- if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
- (error_code & PF_INSTR), foreign))
+ if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
+ (error_code & X86_PF_INSTR), foreign))
return 1;
- if (error_code & PF_WRITE) {
+ if (error_code & X86_PF_WRITE) {
/* write, present and write, not present: */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
@@ -1220,7 +1200,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
}
/* read, present: */
- if (unlikely(error_code & PF_PROT))
+ if (unlikely(error_code & X86_PF_PROT))
return 1;
/* read, not present: */
@@ -1243,7 +1223,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
if (!static_cpu_has(X86_FEATURE_SMAP))
return false;
- if (error_code & PF_USER)
+ if (error_code & X86_PF_USER)
return false;
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
@@ -1296,7 +1276,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
* protection error (error_code & 9) == 0.
*/
if (unlikely(fault_in_kernel_space(address))) {
- if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
+ if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
if (vmalloc_fault(address) >= 0)
return;
@@ -1324,7 +1304,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
if (unlikely(kprobes_fault(regs)))
return;
- if (unlikely(error_code & PF_RSVD))
+ if (unlikely(error_code & X86_PF_RSVD))
pgtable_bad(regs, error_code, address);
if (unlikely(smap_violation(error_code, regs))) {
@@ -1350,7 +1330,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
*/
if (user_mode(regs)) {
local_irq_enable();
- error_code |= PF_USER;
+ error_code |= X86_PF_USER;
flags |= FAULT_FLAG_USER;
} else {
if (regs->flags & X86_EFLAGS_IF)
@@ -1359,9 +1339,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- if (error_code & PF_WRITE)
+ if (error_code & X86_PF_WRITE)
flags |= FAULT_FLAG_WRITE;
- if (error_code & PF_INSTR)
+ if (error_code & X86_PF_INSTR)
flags |= FAULT_FLAG_INSTRUCTION;
/*
@@ -1381,7 +1361,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
* space check, thus avoiding the deadlock:
*/
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
- if ((error_code & PF_USER) == 0 &&
+ if (!(error_code & X86_PF_USER) &&
!search_exception_tables(regs->ip)) {
bad_area_nosemaphore(regs, error_code, address, NULL);
return;
@@ -1408,7 +1388,7 @@ retry:
bad_area(regs, error_code, address);
return;
}
- if (error_code & PF_USER) {
+ if (error_code & X86_PF_USER) {
/*
* Accessing the stack below %sp is always a bug.
* The large cushion allows instructions like enter
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 836a1eb5df43..3ee234b6234d 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -6,6 +6,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <os.h>
@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
mm->arch.ldt.entry_count = 0;
}
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
- return do_modify_ldt_skas(func, ptr, bytecount);
+ /* See non-um modify_ldt() for why we do this cast */
+ return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 69b9deff7e5c..e7b213047724 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -600,7 +600,7 @@ static struct trap_array_entry trap_array[] = {
#ifdef CONFIG_X86_MCE
{ machine_check, xen_machine_check, true },
#endif
- { nmi, xen_nmi, true },
+ { nmi, xen_xennmi, true },
{ overflow, xen_overflow, false },
#ifdef CONFIG_IA32_EMULATION
{ entry_INT80_compat, xen_entry_INT80_compat, false },
@@ -810,15 +810,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
}
}
-static void xen_load_sp0(struct tss_struct *tss,
- struct thread_struct *thread)
+static void xen_load_sp0(unsigned long sp0)
{
struct multicall_space mcs;
mcs = xen_mc_entry(0);
- MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
+ MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
- tss->x86_tss.sp0 = thread->sp0;
+ this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
}
void xen_set_iopl_mask(unsigned mask)
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 51471408fdd1..8c0e047d0b80 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -13,6 +13,7 @@
* single-threaded.
*/
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/smp.h>
@@ -293,12 +294,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
#endif
memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
+ /*
+ * Bring up the CPU in cpu_bringup_and_idle() with the stack
+ * pointing just below where pt_regs would be if it were a normal
+ * kernel entry.
+ */
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
ctxt->flags = VGCF_IN_KERNEL;
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
+ ctxt->user_regs.cs = __KERNEL_CS;
+ ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
xen_copy_trap_info(ctxt->trap_ctxt);
@@ -313,8 +321,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->gdt_frames[0] = gdt_mfn;
ctxt->gdt_ents = GDT_ENTRIES;
+ /*
+ * Set SS:SP that Xen will use when entering guest kernel mode
+ * from guest user mode. Subsequent calls to load_sp0() can
+ * change this value.
+ */
ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.sp0;
+ ctxt->kernel_sp = task_top_of_stack(idle);
#ifdef CONFIG_X86_32
ctxt->event_callback_cs = __KERNEL_CS;
@@ -326,10 +339,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
(unsigned long)xen_hypervisor_callback;
ctxt->failsafe_callback_eip =
(unsigned long)xen_failsafe_callback;
- ctxt->user_regs.cs = __KERNEL_CS;
per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
BUG();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index dae2cc33afb5..286ecc198562 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -29,7 +29,7 @@ xen_pv_trap debug
xen_pv_trap xendebug
xen_pv_trap int3
xen_pv_trap xenint3
-xen_pv_trap nmi
+xen_pv_trap xennmi
xen_pv_trap overflow
xen_pv_trap bounds
xen_pv_trap invalid_op
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index a7525e95d53f..124941d09b2b 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -9,6 +9,7 @@
#include <asm/boot.h>
#include <asm/asm.h>
#include <asm/page_types.h>
+#include <asm/unwind_hints.h>
#include <xen/interface/elfnote.h>
#include <xen/interface/features.h>
@@ -19,6 +20,7 @@
#ifdef CONFIG_XEN_PV
__INIT
ENTRY(startup_xen)
+ UNWIND_HINT_EMPTY
cld
/* Clear .bss */
@@ -33,21 +35,24 @@ ENTRY(startup_xen)
mov $init_thread_union+THREAD_SIZE, %_ASM_SP
jmp xen_start_kernel
-
+END(startup_xen)
__FINIT
#endif
.pushsection .text
.balign PAGE_SIZE
ENTRY(hypercall_page)
- .skip PAGE_SIZE
+ .rept (PAGE_SIZE / 32)
+ UNWIND_HINT_EMPTY
+ .skip 32
+ .endr
#define HYPERCALL(n) \
.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
#include <asm/xen-hypercalls.h>
#undef HYPERCALL
-
+END(hypercall_page)
.popsection
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 3ba24c428c3b..2fae850a3eff 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -214,7 +214,9 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
- if (err || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ if (err ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index f905f2361d12..8bae88a150fd 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -146,11 +146,8 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
WARN_ON(host->sg_len > 1);
/* This DMAC cannot handle if buffer is not 8-bytes alignment */
- if (!IS_ALIGNED(sg->offset, 8)) {
- host->force_pio = true;
- renesas_sdhi_internal_dmac_enable_dma(host, false);
- return;
- }
+ if (!IS_ALIGNED(sg->offset, 8))
+ goto force_pio;
if (data->flags & MMC_DATA_READ) {
dtran_mode |= DTRAN_MODE_CH_NUM_CH1;
@@ -163,8 +160,8 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
}
ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, dir);
- if (ret < 0)
- return;
+ if (ret == 0)
+ goto force_pio;
renesas_sdhi_internal_dmac_enable_dma(host, true);
@@ -176,6 +173,12 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
dtran_mode);
renesas_sdhi_internal_dmac_dm_write(host, DM_DTRAN_ADDR,
sg->dma_address);
+
+ return;
+
+force_pio:
+ host->force_pio = true;
+ renesas_sdhi_internal_dmac_enable_dma(host, false);
}
static void renesas_sdhi_internal_dmac_issue_tasklet_fn(unsigned long arg)
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index a7293e186e03..9c4e6199b854 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -47,6 +47,7 @@
#include <linux/mmc/sdio.h>
#include <linux/scatterlist.h>
#include <linux/spinlock.h>
+#include <linux/swiotlb.h>
#include <linux/workqueue.h>
#include "tmio_mmc.h"
@@ -1215,6 +1216,18 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host,
mmc->max_blk_count = pdata->max_blk_count ? :
(PAGE_SIZE / mmc->max_blk_size) * mmc->max_segs;
mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
+ /*
+ * Since swiotlb has memory size limitation, this will calculate
+ * the maximum size locally (because we don't have any APIs for it now)
+ * and check the current max_req_size. And then, this will update
+ * the max_req_size if needed as a workaround.
+ */
+ if (swiotlb_max_segment()) {
+ unsigned int max_size = (1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
+
+ if (mmc->max_req_size > max_size)
+ mmc->max_req_size = max_size;
+ }
mmc->max_seg_size = mmc->max_req_size;
_host->native_hotplug = !(pdata->flags & TMIO_MMC_USE_GPIO_CD ||
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8acfc1e099e1..63e56f6c1877 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -687,7 +687,7 @@
#define BUG_TABLE
#endif
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
#define ORC_UNWIND_TABLE \
. = ALIGN(4); \
.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) { \
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 6737a8c9e8c6..d68b0569a5eb 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -175,7 +175,8 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev)
static inline s32 dev_pm_qos_raw_read_value(struct device *dev)
{
return IS_ERR_OR_NULL(dev->power.qos) ?
- 0 : pm_qos_read_value(&dev->power.qos->resume_latency);
+ PM_QOS_RESUME_LATENCY_NO_CONSTRAINT :
+ pm_qos_read_value(&dev->power.qos->resume_latency);
}
#else
static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dfdad67d8f6c..ff21b4dbb392 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -376,7 +376,7 @@ config STACK_VALIDATION
that runtime stack traces are more reliable.
This is also a prerequisite for generation of ORC unwind data, which
- is needed for CONFIG_ORC_UNWINDER.
+ is needed for CONFIG_UNWINDER_ORC.
For more information, see
tools/objtool/Documentation/stack-validation.txt.
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 4bb30206b942..c835f9080c43 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -161,6 +161,7 @@ int ioremap_page_range(unsigned long addr,
unsigned long next;
int err;
+ might_sleep();
BUG_ON(addr >= end);
start = addr;
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 061d0c3a420a..f965f477832e 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -258,7 +258,7 @@ ifneq ($(SKIP_STACK_VALIDATION),1)
__objtool_obj := $(objtree)/tools/objtool/objtool
-objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
+objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
ifndef CONFIG_FRAME_POINTER
objtool_args += --no-fp
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index c0e26ad1fa7e..9b341584eb1b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
if (insn->dead_end)
return 0;
- insn = next_insn;
- if (!insn) {
+ if (!next_insn) {
+ if (state.cfa.base == CFI_UNDEFINED)
+ return 0;
WARN("%s: unexpected end of section", sec->name);
return 1;
}
+
+ insn = next_insn;
}
return 0;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 31e0f9143840..07f329919828 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -70,7 +70,7 @@ static void cmd_usage(void)
printf("\n");
- exit(1);
+ exit(129);
}
static void handle_options(int *argc, const char ***argv)
@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
break;
} else {
fprintf(stderr, "Unknown option: %s\n", cmd);
- fprintf(stderr, "\n Usage: %s\n",
- objtool_usage_string);
- exit(1);
+ cmd_usage();
}
(*argv)++;