summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/boot/boot.h2
-rw-r--r--arch/x86/boot/compressed/head_32.S4
-rw-r--r--arch/x86/boot/compressed/head_64.S18
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c13
-rw-r--r--arch/x86/boot/main.c1
-rw-r--r--arch/x86/boot/string.c8
-rw-r--r--arch/x86/entry/calling.h17
-rw-r--r--arch/x86/entry/common.c13
-rw-r--r--arch/x86/entry/entry_32.S19
-rw-r--r--arch/x86/entry/entry_64.S29
-rw-r--r--arch/x86/entry/syscall_64.c25
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl4
-rw-r--r--arch/x86/entry/syscalls/syscalltbl.sh35
-rw-r--r--arch/x86/entry/thunk_32.S2
-rw-r--r--arch/x86/entry/thunk_64.S4
-rw-r--r--arch/x86/events/amd/ibs.c13
-rw-r--r--arch/x86/events/core.c38
-rw-r--r--arch/x86/events/intel/core.c107
-rw-r--r--arch/x86/events/intel/cstate.c44
-rw-r--r--arch/x86/events/intel/ds.c53
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/events/intel/pt.c336
-rw-r--r--arch/x86/events/intel/pt.h12
-rw-r--r--arch/x86/events/intel/rapl.c30
-rw-r--r--arch/x86/events/intel/uncore.c28
-rw-r--r--arch/x86/events/msr.c28
-rw-r--r--arch/x86/events/perf_event.h17
-rw-r--r--arch/x86/hyperv/mmu.c8
-rw-r--r--arch/x86/include/asm/barrier.h3
-rw-r--r--arch/x86/include/asm/bitops.h7
-rw-r--r--arch/x86/include/asm/bootparam_utils.h64
-rw-r--r--arch/x86/include/asm/cpufeature.h7
-rw-r--r--arch/x86/include/asm/cpufeatures.h5
-rw-r--r--arch/x86/include/asm/div64.h13
-rw-r--r--arch/x86/include/asm/efi.h5
-rw-r--r--arch/x86/include/asm/error-injection.h13
-rw-r--r--arch/x86/include/asm/fixmap.h5
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/intel-family.h69
-rw-r--r--arch/x86/include/asm/intel_pt.h2
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h12
-rw-r--r--arch/x86/include/asm/mmu_context.h8
-rw-r--r--arch/x86/include/asm/msr-index.h16
-rw-r--r--arch/x86/include/asm/msr.h3
-rw-r--r--arch/x86/include/asm/nospec-branch.h2
-rw-r--r--arch/x86/include/asm/perf_event.h12
-rw-r--r--arch/x86/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/qspinlock.h15
-rw-r--r--arch/x86/include/asm/realmode.h1
-rw-r--r--arch/x86/include/asm/set_memory.h8
-rw-r--r--arch/x86/include/asm/syscall.h4
-rw-r--r--arch/x86/include/asm/text-patching.h4
-rw-r--r--arch/x86/include/asm/tlbflush.h30
-rw-r--r--arch/x86/include/asm/uaccess.h4
-rw-r--r--arch/x86/include/asm/unistd.h6
-rw-r--r--arch/x86/include/asm/uv/uv.h4
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h36
-rw-r--r--arch/x86/include/uapi/asm/byteorder.h2
-rw-r--r--arch/x86/include/uapi/asm/errno.h1
-rw-r--r--arch/x86/include/uapi/asm/fcntl.h1
-rw-r--r--arch/x86/include/uapi/asm/hwcap2.h2
-rw-r--r--arch/x86/include/uapi/asm/ioctl.h1
-rw-r--r--arch/x86/include/uapi/asm/ioctls.h1
-rw-r--r--arch/x86/include/uapi/asm/ipcbuf.h1
-rw-r--r--arch/x86/include/uapi/asm/param.h1
-rw-r--r--arch/x86/include/uapi/asm/resource.h1
-rw-r--r--arch/x86/include/uapi/asm/sigcontext32.h2
-rw-r--r--arch/x86/include/uapi/asm/termbits.h1
-rw-r--r--arch/x86/include/uapi/asm/termios.h1
-rw-r--r--arch/x86/include/uapi/asm/types.h7
-rw-r--r--arch/x86/include/uapi/asm/unistd.h2
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S10
-rw-r--r--arch/x86/kernel/alternative.c6
-rw-r--r--arch/x86/kernel/amd_nb.c3
-rw-r--r--arch/x86/kernel/apic/apic.c92
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c24
-rw-r--r--arch/x86/kernel/apic/io_apic.c8
-rw-r--r--arch/x86/kernel/apic/probe_32.c3
-rw-r--r--arch/x86/kernel/asm-offsets_64.c20
-rw-r--r--arch/x86/kernel/cpu/amd.c92
-rw-r--r--arch/x86/kernel/cpu/bugs.c125
-rw-r--r--arch/x86/kernel/cpu/common.c45
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c97
-rw-r--r--arch/x86/kernel/cpu/hygon.c21
-rw-r--r--arch/x86/kernel/cpu/intel.c31
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c1
-rw-r--r--arch/x86/kernel/cpu/umwait.c39
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/head_64.S8
-rw-r--r--arch/x86/kernel/hpet.c12
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/kprobes/opt.c2
-rw-r--r--arch/x86/kernel/kvm.c10
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/pci-dma.c20
-rw-r--r--arch/x86/kernel/ptrace.c1
-rw-r--r--arch/x86/kernel/quirks.c4
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/sysfb_efi.c46
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/tsc_msr.c5
-rw-r--r--arch/x86/kernel/umip.c65
-rw-r--r--arch/x86/kernel/uprobes.c17
-rw-r--r--arch/x86/kvm/debugfs.c46
-rw-r--r--arch/x86/kvm/hyperv.c5
-rw-r--r--arch/x86/kvm/lapic.c13
-rw-r--r--arch/x86/kvm/mmu.c120
-rw-r--r--arch/x86/kvm/svm.c28
-rw-r--r--arch/x86/kvm/vmx/nested.c8
-rw-r--r--arch/x86/kvm/vmx/vmx.c20
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/lib/copy_user_64.S14
-rw-r--r--arch/x86/lib/cpu.c1
-rw-r--r--arch/x86/lib/getuser.S16
-rw-r--r--arch/x86/lib/putuser.S22
-rw-r--r--arch/x86/math-emu/errors.c5
-rw-r--r--arch/x86/math-emu/fpu_trig.c2
-rw-r--r--arch/x86/mm/fault.c15
-rw-r--r--arch/x86/mm/init_32.c2
-rw-r--r--arch/x86/mm/ioremap.c1
-rw-r--r--arch/x86/mm/numa.c4
-rw-r--r--arch/x86/mm/pageattr.c136
-rw-r--r--arch/x86/mm/tlb.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c9
-rw-r--r--arch/x86/pci/mmconfig-shared.c5
-rw-r--r--arch/x86/platform/efi/efi.c39
-rw-r--r--arch/x86/platform/intel/iosf_mbi.c100
-rw-r--r--arch/x86/platform/uv/bios_uv.c10
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/power/cpu.c86
-rw-r--r--arch/x86/purgatory/Makefile37
-rw-r--r--arch/x86/purgatory/purgatory.c6
-rw-r--r--arch/x86/purgatory/string.c23
-rw-r--r--arch/x86/realmode/rm/header.S1
-rw-r--r--arch/x86/realmode/rm/trampoline_32.S3
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S3
-rw-r--r--arch/x86/realmode/rm/trampoline_common.S4
145 files changed, 1912 insertions, 1064 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 222855cc0158..58eae28c3dd6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1503,7 +1503,7 @@ config X86_5LEVEL
config X86_DIRECT_GBPAGES
def_bool y
- depends on X86_64 && !DEBUG_PAGEALLOC
+ depends on X86_64
---help---
Certain kernel features effectively disable kernel
linear 1 GB mappings (even if the CPU otherwise
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 71c92db47c41..bf9cd83de777 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -171,7 +171,7 @@ config HAVE_MMIOTRACE_SUPPORT
config X86_DECODER_SELFTEST
bool "x86 instruction decoder selftest"
- depends on DEBUG_KERNEL && KPROBES
+ depends on DEBUG_KERNEL && INSTRUCTION_DECODER
depends on !COMPILE_TEST
---help---
Perform x86 instruction decoder selftests at build time.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 56e748a7679f..94df0868804b 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -38,6 +38,7 @@ REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector)
+REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member)
REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4))
export REALMODE_CFLAGS
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 19eca14b49a0..ca866f1cca2e 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -28,8 +28,6 @@
#include "cpuflags.h"
/* Useful macros */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
extern struct setup_header hdr;
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 37380c0d5999..5e30eaaf8576 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -140,7 +140,7 @@ ENTRY(startup_32)
/*
* Jump to the relocated address.
*/
- leal relocated(%ebx), %eax
+ leal .Lrelocated(%ebx), %eax
jmp *%eax
ENDPROC(startup_32)
@@ -209,7 +209,7 @@ ENDPROC(efi32_stub_entry)
#endif
.text
-relocated:
+.Lrelocated:
/*
* Clear BSS (stack is currently empty)
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 6233ae35d0d9..d98cd483377e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -87,7 +87,7 @@ ENTRY(startup_32)
call verify_cpu
testl %eax, %eax
- jnz no_longmode
+ jnz .Lno_longmode
/*
* Compute the delta between where we were compiled to run at
@@ -322,7 +322,7 @@ ENTRY(startup_64)
1: popq %rdi
subq $1b, %rdi
- call adjust_got
+ call .Ladjust_got
/*
* At this point we are in long mode with 4-level paging enabled,
@@ -421,7 +421,7 @@ trampoline_return:
/* The new adjustment is the relocation address */
movq %rbx, %rdi
- call adjust_got
+ call .Ladjust_got
/*
* Copy the compressed kernel to the end of our buffer
@@ -440,7 +440,7 @@ trampoline_return:
/*
* Jump to the relocated address.
*/
- leaq relocated(%rbx), %rax
+ leaq .Lrelocated(%rbx), %rax
jmp *%rax
#ifdef CONFIG_EFI_STUB
@@ -511,7 +511,7 @@ ENDPROC(efi64_stub_entry)
#endif
.text
-relocated:
+.Lrelocated:
/*
* Clear BSS (stack is currently empty)
@@ -548,7 +548,7 @@ relocated:
* first time we touch GOT).
* RDI is the new adjustment to apply.
*/
-adjust_got:
+.Ladjust_got:
/* Walk through the GOT adding the address to the entries */
leaq _got(%rip), %rdx
leaq _egot(%rip), %rcx
@@ -622,7 +622,7 @@ ENTRY(trampoline_32bit_src)
movl %eax, %cr4
/* Calculate address of paging_enabled() once we are executing in the trampoline */
- leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
+ leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
/* Prepare the stack for far return to Long Mode */
pushl $__KERNEL_CS
@@ -635,7 +635,7 @@ ENTRY(trampoline_32bit_src)
lret
.code64
-paging_enabled:
+.Lpaging_enabled:
/* Return from the trampoline */
jmp *%rdi
@@ -647,7 +647,7 @@ paging_enabled:
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
.code32
-no_longmode:
+.Lno_longmode:
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
hlt
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 5f2d03067ae5..c8862696a47b 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -72,6 +72,8 @@ static unsigned long find_trampoline_placement(void)
/* Find the first usable memory region under bios_start. */
for (i = boot_params->e820_entries - 1; i >= 0; i--) {
+ unsigned long new = bios_start;
+
entry = &boot_params->e820_table[i];
/* Skip all entries above bios_start. */
@@ -84,15 +86,20 @@ static unsigned long find_trampoline_placement(void)
/* Adjust bios_start to the end of the entry if needed. */
if (bios_start > entry->addr + entry->size)
- bios_start = entry->addr + entry->size;
+ new = entry->addr + entry->size;
/* Keep bios_start page-aligned. */
- bios_start = round_down(bios_start, PAGE_SIZE);
+ new = round_down(new, PAGE_SIZE);
/* Skip the entry if it's too small. */
- if (bios_start - TRAMPOLINE_32BIT_SIZE < entry->addr)
+ if (new - TRAMPOLINE_32BIT_SIZE < entry->addr)
continue;
+ /* Protect against underflow. */
+ if (new - TRAMPOLINE_32BIT_SIZE > bios_start)
+ break;
+
+ bios_start = new;
break;
}
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 996df3d586f0..e3add857c2c9 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -10,6 +10,7 @@
/*
* Main module for the real-mode kernel code
*/
+#include <linux/build_bug.h>
#include "boot.h"
#include "string.h"
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 401e30ca0a75..8272a4492844 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -37,6 +37,14 @@ int memcmp(const void *s1, const void *s2, size_t len)
return diff;
}
+/*
+ * Clang may lower `memcmp == 0` to `bcmp == 0`.
+ */
+int bcmp(const void *s1, const void *s2, size_t len)
+{
+ return memcmp(s1, s2, len);
+}
+
int strcmp(const char *str1, const char *str2)
{
const unsigned char *s1 = (const unsigned char *)str1;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 830bd984182b..515c0ceeb4a3 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -314,6 +314,23 @@ For 32-bit we have the following conventions - kernel is built with
#endif
+/*
+ * Mitigate Spectre v1 for conditional swapgs code paths.
+ *
+ * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
+ * prevent a speculative swapgs when coming from kernel space.
+ *
+ * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
+ * to prevent the swapgs from getting speculatively skipped when coming from
+ * user space.
+ */
+.macro FENCE_SWAPGS_USER_ENTRY
+ ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
+.endm
+.macro FENCE_SWAPGS_KERNEL_ENTRY
+ ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
+.endm
+
.macro STACKLEAK_ERASE_NOCLOBBER
#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
PUSH_AND_CLEAR_REGS
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 536b574b6161..3f8e22615812 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -285,15 +285,16 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
nr = syscall_trace_enter(regs);
- /*
- * NB: Native and x32 syscalls are dispatched from the same
- * table. The only functional difference is the x32 bit in
- * regs->orig_ax, which changes the behavior of some syscalls.
- */
- nr &= __SYSCALL_MASK;
if (likely(nr < NR_syscalls)) {
nr = array_index_nospec(nr, NR_syscalls);
regs->ax = sys_call_table[nr](regs);
+#ifdef CONFIG_X86_X32_ABI
+ } else if (likely((nr & __X32_SYSCALL_BIT) &&
+ (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
+ nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
+ X32_NR_syscalls);
+ regs->ax = x32_sys_call_table[nr](regs);
+#endif
}
syscall_return_slowpath(regs);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2bb986f305ac..f83ca5aa8b77 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -63,7 +63,7 @@
* enough to patch inline, increasing performance.
*/
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
@@ -1084,7 +1084,7 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0, PER_CPU_VAR(__preempt_count)
jnz .Lno_preempt
@@ -1364,7 +1364,7 @@ ENTRY(xen_hypervisor_callback)
ENTRY(xen_do_upcall)
1: mov %esp, %eax
call xen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
call xen_maybe_preempt_hcall
#endif
jmp ret_from_intr
@@ -1443,8 +1443,12 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
ENTRY(page_fault)
ASM_CLAC
- pushl $0; /* %gs's slot on the stack */
+ pushl $do_page_fault
+ jmp common_exception_read_cr2
+END(page_fault)
+common_exception_read_cr2:
+ /* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1
ENCODE_FRAME_POINTER
@@ -1452,6 +1456,7 @@ ENTRY(page_fault)
/* fixup %gs */
GS_TO_REG %ecx
+ movl PT_GS(%esp), %edi
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
@@ -1463,9 +1468,9 @@ ENTRY(page_fault)
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call do_page_fault
+ CALL_NOSPEC %edi
jmp ret_from_exception
-END(page_fault)
+END(common_exception_read_cr2)
common_exception:
/* the function address is in %gs's slot on the stack */
@@ -1595,7 +1600,7 @@ END(general_protection)
ENTRY(async_page_fault)
ASM_CLAC
pushl $do_async_page_fault
- jmp common_exception
+ jmp common_exception_read_cr2
END(async_page_fault)
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3f5a978a02a7..b7c3ea4cb19d 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -519,7 +519,7 @@ ENTRY(interrupt_entry)
testb $3, CS-ORIG_RAX+8(%rsp)
jz 1f
SWAPGS
-
+ FENCE_SWAPGS_USER_ENTRY
/*
* Switch to the thread stack. The IRET frame and orig_ax are
* on the stack, as well as the return address. RDI..R12 are
@@ -549,8 +549,10 @@ ENTRY(interrupt_entry)
UNWIND_HINT_FUNC
movq (%rdi), %rdi
+ jmp 2f
1:
-
+ FENCE_SWAPGS_KERNEL_ENTRY
+2:
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
@@ -662,7 +664,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
/* Returning to kernel space */
retint_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/* Interrupts are off */
/* Check if we need preemption */
btl $9, EFLAGS(%rsp) /* were interrupts off? */
@@ -1056,10 +1058,10 @@ ENTRY(native_load_gs_index)
ENDPROC(native_load_gs_index)
EXPORT_SYMBOL(native_load_gs_index)
- _ASM_EXTABLE(.Lgs_change, bad_gs)
+ _ASM_EXTABLE(.Lgs_change, .Lbad_gs)
.section .fixup, "ax"
/* running with kernelgs */
-bad_gs:
+.Lbad_gs:
SWAPGS /* switch back to user gs */
.macro ZAP_GS
/* This can't be a string because the preprocessor needs to see it. */
@@ -1113,7 +1115,7 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
call xen_evtchn_do_upcall
LEAVE_IRQ_STACK
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
call xen_maybe_preempt_hcall
#endif
jmp error_exit
@@ -1238,6 +1240,13 @@ ENTRY(paranoid_entry)
*/
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+ /*
+ * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
+ * unconditional CR3 write, even in the PTI case. So do an lfence
+ * to prevent GS speculation, regardless of whether PTI is enabled.
+ */
+ FENCE_SWAPGS_KERNEL_ENTRY
+
ret
END(paranoid_entry)
@@ -1288,6 +1297,7 @@ ENTRY(error_entry)
* from user mode due to an IRET fault.
*/
SWAPGS
+ FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
@@ -1301,6 +1311,8 @@ ENTRY(error_entry)
pushq %r12
ret
+.Lerror_entry_done_lfence:
+ FENCE_SWAPGS_KERNEL_ENTRY
.Lerror_entry_done:
ret
@@ -1318,7 +1330,7 @@ ENTRY(error_entry)
cmpq %rax, RIP+8(%rsp)
je .Lbstep_iret
cmpq $.Lgs_change, RIP+8(%rsp)
- jne .Lerror_entry_done
+ jne .Lerror_entry_done_lfence
/*
* hack: .Lgs_change can fail with user gsbase. If this happens, fix up
@@ -1326,6 +1338,7 @@ ENTRY(error_entry)
* .Lgs_change's error handler with kernel gsbase.
*/
SWAPGS
+ FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
jmp .Lerror_entry_done
@@ -1340,6 +1353,7 @@ ENTRY(error_entry)
* gsbase and CR3. Switch to kernel gsbase and CR3:
*/
SWAPGS
+ FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/*
@@ -1431,6 +1445,7 @@ ENTRY(nmi)
swapgs
cld
+ FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index d5252bc1e380..b1bf31713374 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -10,10 +10,13 @@
/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
+#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
#include <asm/syscalls_64.h>
#undef __SYSCALL_64
+#undef __SYSCALL_X32
#define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_X32(nr, sym, qual)
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
@@ -23,3 +26,25 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_64.h>
};
+
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#ifdef CONFIG_X86_X32_ABI
+
+#define __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
+
+asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
+ /*
+ * Smells like a compiler bug -- it doesn't work
+ * when the & below is removed.
+ */
+ [0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
+#include <asm/syscalls_64.h>
+};
+
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#endif
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c00019abd076..3fe02546aed3 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -186,11 +186,11 @@
172 i386 prctl sys_prctl __ia32_sys_prctl
173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
-175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask
+175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask
176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo
-179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend
+179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend
180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread
181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite
182 i386 chown sys_chown16 __ia32_sys_chown16
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 94fcd1951aca..1af2be39e7d9 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -1,13 +1,13 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
in="$1"
out="$2"
syscall_macro() {
- abi="$1"
- nr="$2"
- entry="$3"
+ local abi="$1"
+ local nr="$2"
+ local entry="$3"
# Entry can be either just a function name or "function/qualifier"
real_entry="${entry%%/*}"
@@ -21,14 +21,14 @@ syscall_macro() {
}
emit() {
- abi="$1"
- nr="$2"
- entry="$3"
- compat="$4"
- umlentry=""
+ local abi="$1"
+ local nr="$2"
+ local entry="$3"
+ local compat="$4"
+ local umlentry=""
- if [ "$abi" = "64" -a -n "$compat" ]; then
- echo "a compat entry for a 64-bit syscall makes no sense" >&2
+ if [ "$abi" != "I386" -a -n "$compat" ]; then
+ echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
exit 1
fi
@@ -62,14 +62,17 @@ grep '^[0-9]' "$in" | sort -n | (
while read nr abi name entry compat; do
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
- # COMMON is the same as 64, except that we don't expect X32
- # programs to use it. Our expectation has nothing to do with
- # any generated code, so treat them the same.
emit 64 "$nr" "$entry" "$compat"
+ if [ "$abi" = "COMMON" ]; then
+ # COMMON means that this syscall exists in the same form for
+ # 64-bit and X32.
+ echo "#ifdef CONFIG_X86_X32_ABI"
+ emit X32 "$nr" "$entry" "$compat"
+ echo "#endif"
+ fi
elif [ "$abi" = "X32" ]; then
- # X32 is equivalent to 64 on an X32-compatible kernel.
echo "#ifdef CONFIG_X86_X32_ABI"
- emit 64 "$nr" "$entry" "$compat"
+ emit X32 "$nr" "$entry" "$compat"
echo "#endif"
elif [ "$abi" = "I386" ]; then
emit "$abi" "$nr" "$entry" "$compat"
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index cb3464525b37..2713490611a3 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -34,7 +34,7 @@
THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
#endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
THUNK ___preempt_schedule, preempt_schedule
THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
EXPORT_SYMBOL(___preempt_schedule)
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index cc20465b2867..ea5c4167086c 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -46,7 +46,7 @@
THUNK lockdep_sys_exit_thunk,lockdep_sys_exit
#endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
THUNK ___preempt_schedule, preempt_schedule
THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
EXPORT_SYMBOL(___preempt_schedule)
@@ -55,7 +55,7 @@
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
- || defined(CONFIG_PREEMPT)
+ || defined(CONFIG_PREEMPTION)
.L_restore:
popq %r11
popq %r10
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 62f317c9113a..5b35b7ea5d72 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -661,10 +661,17 @@ fail:
throttle = perf_event_overflow(event, &data, &regs);
out:
- if (throttle)
+ if (throttle) {
perf_ibs_stop(event, 0);
- else
- perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
+ } else {
+ period >>= 4;
+
+ if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
+ (*config & IBS_OP_CNT_CTL))
+ period |= *config & IBS_OP_CUR_CNT_RAND;
+
+ perf_ibs_enable_event(perf_ibs, hwc, period);
+ }
perf_event_update_userpage(event);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 81b005e4c7d9..7b21455d7504 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1005,6 +1005,27 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
/* current number of events already accepted */
n = cpuc->n_events;
+ if (!cpuc->n_events)
+ cpuc->pebs_output = 0;
+
+ if (!cpuc->is_fake && leader->attr.precise_ip) {
+ /*
+ * For PEBS->PT, if !aux_event, the group leader (PT) went
+ * away, the group was broken down and this singleton event
+ * can't schedule any more.
+ */
+ if (is_pebs_pt(leader) && !leader->aux_event)
+ return -EINVAL;
+
+ /*
+ * pebs_output: 0: no PEBS so far, 1: PT, 2: DS
+ */
+ if (cpuc->pebs_output &&
+ cpuc->pebs_output != is_pebs_pt(leader) + 1)
+ return -EINVAL;
+
+ cpuc->pebs_output = is_pebs_pt(leader) + 1;
+ }
if (is_x86_event(leader)) {
if (n >= max_count)
@@ -1236,7 +1257,7 @@ void x86_pmu_enable_event(struct perf_event *event)
* Add a single event to the PMU.
*
* The event is added to the group of enabled events
- * but only if it can be scehduled with existing events.
+ * but only if it can be scheduled with existing events.
*/
static int x86_pmu_add(struct perf_event *event, int flags)
{
@@ -2087,7 +2108,7 @@ static int x86_pmu_event_init(struct perf_event *event)
static void refresh_pce(void *ignored)
{
- load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
+ load_mm_cr4_irqsoff(this_cpu_read(cpu_tlbstate.loaded_mm));
}
static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
@@ -2241,6 +2262,17 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
return 0;
}
+static int x86_pmu_aux_output_match(struct perf_event *event)
+{
+ if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT))
+ return 0;
+
+ if (x86_pmu.aux_output_match)
+ return x86_pmu.aux_output_match(event);
+
+ return 0;
+}
+
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
@@ -2266,6 +2298,8 @@ static struct pmu pmu = {
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
.check_period = x86_pmu_check_period,
+
+ .aux_output_match = x86_pmu_aux_output_match,
};
void arch_perf_update_userpage(struct perf_event *event,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9e911a96972b..27ee47a7be66 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,9 +18,9 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/intel-family.h>
+#include <asm/intel_pt.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
-#include <asm/hypervisor.h>
#include "../perf_event.h"
@@ -263,8 +263,8 @@ static struct event_constraint intel_icl_event_constraints[] = {
};
static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
@@ -3299,6 +3299,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
}
+ if (event->attr.aux_output) {
+ if (!event->attr.precise_ip)
+ return -EINVAL;
+
+ event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
+ }
+
if (event->attr.type != PERF_TYPE_RAW)
return 0;
@@ -3573,6 +3580,11 @@ static u64 bdw_limit_period(struct perf_event *event, u64 left)
return left;
}
+static u64 nhm_limit_period(struct perf_event *event, u64 left)
+{
+ return max(left, 32ULL);
+}
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -3812,6 +3824,14 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}
+static int intel_pmu_aux_output_match(struct perf_event *event)
+{
+ if (!x86_pmu.intel_cap.pebs_output_pt_available)
+ return 0;
+
+ return is_intel_pt_event(event);
+}
+
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -3936,6 +3956,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.sched_task = intel_pmu_sched_task,
.check_period = intel_pmu_check_period,
+
+ .aux_output_match = intel_pmu_aux_output_match,
};
static __init void intel_clovertown_quirk(void)
@@ -3965,31 +3987,31 @@ static __init void intel_clovertown_quirk(void)
}
static const struct x86_cpu_desc isolation_ucodes[] = {
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE, 3, 0x0000001f),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT, 1, 0x0000001e),
- INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E, 1, 0x00000015),
+ INTEL_CPU_DESC(INTEL_FAM6_HASWELL, 3, 0x0000001f),
+ INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L, 1, 0x0000001e),
+ INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G, 1, 0x00000015),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037),
INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE, 4, 0x00000023),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E, 1, 0x00000014),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x00000010),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x07000009),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f000009),
- INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e000002),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL, 4, 0x00000023),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G, 1, 0x00000014),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 2, 0x00000010),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009),
+ INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002),
INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021),
INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x0000007c),
- INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x0000007c),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 10, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 11, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 12, 0x0000004e),
- INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP, 13, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c),
+ INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 9, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 10, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 11, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 12, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 10, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 11, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 12, 0x0000004e),
+ INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 13, 0x0000004e),
{}
};
@@ -4053,7 +4075,7 @@ static bool check_msr(unsigned long msr, u64 mask)
* Disable the check for real HW, so we don't
* mess with potentionaly enabled registers:
*/
- if (hypervisor_is_type(X86_HYPER_NATIVE))
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return true;
/*
@@ -4147,7 +4169,7 @@ static const struct x86_cpu_desc counter_freezing_ucodes[] = {
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 2, 0x0000000e),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 9, 0x0000002e),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT, 10, 0x00000008),
- INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_X, 1, 0x00000028),
+ INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_D, 1, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 1, 0x00000028),
INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS, 8, 0x00000006),
{}
@@ -4607,6 +4629,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
+ x86_pmu.limit_period = nhm_limit_period;
mem_attr = nhm_mem_events_attrs;
@@ -4644,7 +4667,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_X:
+ case INTEL_FAM6_ATOM_SILVERMONT_D:
case INTEL_FAM6_ATOM_SILVERMONT_MID:
case INTEL_FAM6_ATOM_AIRMONT:
case INTEL_FAM6_ATOM_AIRMONT_MID:
@@ -4666,7 +4689,7 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GOLDMONT_X:
+ case INTEL_FAM6_ATOM_GOLDMONT_D:
x86_add_quirk(intel_counter_freezing_quirk);
memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -4722,7 +4745,7 @@ __init int intel_pmu_init(void)
name = "goldmont_plus";
break;
- case INTEL_FAM6_ATOM_TREMONT_X:
+ case INTEL_FAM6_ATOM_TREMONT_D:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -4858,10 +4881,10 @@ __init int intel_pmu_init(void)
break;
- case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL:
case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_HASWELL_ULT:
- case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_FAM6_HASWELL_L:
+ case INTEL_FAM6_HASWELL_G:
x86_add_quirk(intel_ht_bug);
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
@@ -4891,9 +4914,9 @@ __init int intel_pmu_init(void)
name = "haswell";
break;
- case INTEL_FAM6_BROADWELL_CORE:
- case INTEL_FAM6_BROADWELL_XEON_D:
- case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL:
+ case INTEL_FAM6_BROADWELL_D:
+ case INTEL_FAM6_BROADWELL_G:
case INTEL_FAM6_BROADWELL_X:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
@@ -4955,10 +4978,11 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE_X:
pmem = true;
- case INTEL_FAM6_SKYLAKE_MOBILE:
- case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_KABYLAKE_MOBILE:
- case INTEL_FAM6_KABYLAKE_DESKTOP:
+ /* fall through */
+ case INTEL_FAM6_SKYLAKE_L:
+ case INTEL_FAM6_SKYLAKE:
+ case INTEL_FAM6_KABYLAKE_L:
+ case INTEL_FAM6_KABYLAKE:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -5002,10 +5026,11 @@ __init int intel_pmu_init(void)
break;
case INTEL_FAM6_ICELAKE_X:
- case INTEL_FAM6_ICELAKE_XEON_D:
+ case INTEL_FAM6_ICELAKE_D:
pmem = true;
- case INTEL_FAM6_ICELAKE_MOBILE:
- case INTEL_FAM6_ICELAKE_DESKTOP:
+ /* fall through */
+ case INTEL_FAM6_ICELAKE_L:
+ case INTEL_FAM6_ICELAKE:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 688592b34564..9f2f39003d96 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -446,7 +446,7 @@ static int cstate_cpu_init(unsigned int cpu)
return 0;
}
-const struct attribute_group *core_attr_update[] = {
+static const struct attribute_group *core_attr_update[] = {
&group_cstate_core_c1,
&group_cstate_core_c3,
&group_cstate_core_c6,
@@ -454,7 +454,7 @@ const struct attribute_group *core_attr_update[] = {
NULL,
};
-const struct attribute_group *pkg_attr_update[] = {
+static const struct attribute_group *pkg_attr_update[] = {
&group_cstate_pkg_c2,
&group_cstate_pkg_c3,
&group_cstate_pkg_c6,
@@ -593,40 +593,40 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_IVYBRIDGE_X, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_CORE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_GT3E, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_X, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_G, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_L, hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_D, slm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_XEON_D, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_GT3E, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_D, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_G, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_X, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_L, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 2c8db2c19328..ce83950036c5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -851,7 +851,7 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
struct event_constraint intel_icl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
@@ -902,6 +902,9 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
*/
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
{
+ if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
+ return false;
+
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}
@@ -919,6 +922,9 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
u64 threshold;
int reserved;
+ if (cpuc->n_pebs_via_pt)
+ return;
+
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
else
@@ -1059,10 +1065,40 @@ void intel_pmu_pebs_add(struct perf_event *event)
cpuc->n_pebs++;
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs++;
+ if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
+ cpuc->n_pebs_via_pt++;
pebs_update_state(needed_cb, cpuc, event, true);
}
+static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (!is_pebs_pt(event))
+ return;
+
+ if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
+ cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
+}
+
+static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+ struct debug_store *ds = cpuc->ds;
+
+ if (!is_pebs_pt(event))
+ return;
+
+ if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
+ cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
+
+ cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
+
+ wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1100,6 +1136,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
} else {
ds->pebs_event_reset[hwc->idx] = 0;
}
+
+ intel_pmu_pebs_via_pt_enable(event);
}
void intel_pmu_pebs_del(struct perf_event *event)
@@ -1111,6 +1149,8 @@ void intel_pmu_pebs_del(struct perf_event *event)
cpuc->n_pebs--;
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs--;
+ if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
+ cpuc->n_pebs_via_pt--;
pebs_update_state(needed_cb, cpuc, event, false);
}
@@ -1120,7 +1160,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- if (cpuc->n_pebs == cpuc->n_large_pebs)
+ if (cpuc->n_pebs == cpuc->n_large_pebs &&
+ cpuc->n_pebs != cpuc->n_pebs_via_pt)
intel_pmu_drain_pebs_buffer();
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -1131,6 +1172,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
+ intel_pmu_pebs_via_pt_disable(event);
+
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -2031,6 +2074,12 @@ void __init intel_ds_init(void)
PERF_SAMPLE_REGS_INTR);
}
pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+
+ if (x86_pmu.intel_cap.pebs_output_pt_available) {
+ pr_cont("PEBS-via-PT, ");
+ x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
+ }
+
break;
default:
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6f814a27416b..ea54634eabf3 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -273,7 +273,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
}
-DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
/* If quirk is enabled, ensure sign extension is 63 bits: */
inline u64 lbr_from_signext_quirk_wr(u64 val)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index d3dc2274ddd4..74e80ed9c6c4 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -204,9 +204,9 @@ static int __init pt_pmu_hw_init(void)
/* model-specific quirks */
switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_BROADWELL_CORE:
- case INTEL_FAM6_BROADWELL_XEON_D:
- case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL:
+ case INTEL_FAM6_BROADWELL_D:
+ case INTEL_FAM6_BROADWELL_G:
case INTEL_FAM6_BROADWELL_X:
/* not setting BRANCH_EN will #GP, erratum BDM106 */
pt_pmu.branch_en_always_on = true;
@@ -545,33 +545,62 @@ static void pt_config_buffer(void *buf, unsigned int topa_idx,
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
-/*
- * Keep ToPA table-related metadata on the same page as the actual table,
- * taking up a few words from the top
- */
-
-#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
-
/**
- * struct topa - page-sized ToPA table with metadata at the top
- * @table: actual ToPA table entries, as understood by PT hardware
+ * struct topa - ToPA metadata
* @list: linkage to struct pt_buffer's list of tables
- * @phys: physical address of this page
* @offset: offset of the first entry in this table in the buffer
* @size: total size of all entries in this table
* @last: index of the last initialized entry in this table
+ * @z_count: how many times the first entry repeats
*/
struct topa {
- struct topa_entry table[TENTS_PER_PAGE];
struct list_head list;
- u64 phys;
u64 offset;
size_t size;
int last;
+ unsigned int z_count;
};
+/*
+ * Keep ToPA table-related metadata on the same page as the actual table,
+ * taking up a few words from the top
+ */
+
+#define TENTS_PER_PAGE \
+ ((PAGE_SIZE - sizeof(struct topa)) / sizeof(struct topa_entry))
+
+/**
+ * struct topa_page - page-sized ToPA table with metadata at the top
+ * @table: actual ToPA table entries, as understood by PT hardware
+ * @topa: metadata
+ */
+struct topa_page {
+ struct topa_entry table[TENTS_PER_PAGE];
+ struct topa topa;
+};
+
+static inline struct topa_page *topa_to_page(struct topa *topa)
+{
+ return container_of(topa, struct topa_page, topa);
+}
+
+static inline struct topa_page *topa_entry_to_page(struct topa_entry *te)
+{
+ return (struct topa_page *)((unsigned long)te & PAGE_MASK);
+}
+
+static inline phys_addr_t topa_pfn(struct topa *topa)
+{
+ return PFN_DOWN(virt_to_phys(topa_to_page(topa)));
+}
+
/* make -1 stand for the last table entry */
-#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
+#define TOPA_ENTRY(t, i) \
+ ((i) == -1 \
+ ? &topa_to_page(t)->table[(t)->last] \
+ : &topa_to_page(t)->table[(i)])
+#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
+#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
/**
* topa_alloc() - allocate page-sized ToPA table
@@ -583,27 +612,26 @@ struct topa {
static struct topa *topa_alloc(int cpu, gfp_t gfp)
{
int node = cpu_to_node(cpu);
- struct topa *topa;
+ struct topa_page *tp;
struct page *p;
p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
if (!p)
return NULL;
- topa = page_address(p);
- topa->last = 0;
- topa->phys = page_to_phys(p);
+ tp = page_address(p);
+ tp->topa.last = 0;
/*
* In case of singe-entry ToPA, always put the self-referencing END
* link as the 2nd entry in the table
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
- TOPA_ENTRY(topa, 1)->end = 1;
+ TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p);
+ TOPA_ENTRY(&tp->topa, 1)->end = 1;
}
- return topa;
+ return &tp->topa;
}
/**
@@ -643,7 +671,7 @@ static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
BUG_ON(last->last != TENTS_PER_PAGE - 1);
- TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
+ TOPA_ENTRY(last, -1)->base = topa_pfn(topa);
TOPA_ENTRY(last, -1)->end = 1;
}
@@ -670,7 +698,7 @@ static bool topa_table_full(struct topa *topa)
*
* Return: 0 on success or error code.
*/
-static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
+static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp)
{
struct topa *topa = buf->last;
int order = 0;
@@ -681,13 +709,18 @@ static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
order = page_private(p);
if (topa_table_full(topa)) {
- topa = topa_alloc(buf->cpu, gfp);
+ topa = topa_alloc(cpu, gfp);
if (!topa)
return -ENOMEM;
topa_insert_table(buf, topa);
}
+ if (topa->z_count == topa->last - 1) {
+ if (order == TOPA_ENTRY(topa, topa->last - 1)->size)
+ topa->z_count++;
+ }
+
TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
TOPA_ENTRY(topa, -1)->size = order;
if (!buf->snapshot &&
@@ -713,23 +746,26 @@ static void pt_topa_dump(struct pt_buffer *buf)
struct topa *topa;
list_for_each_entry(topa, &buf->tables, list) {
+ struct topa_page *tp = topa_to_page(topa);
int i;
- pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
- topa->phys, topa->offset, topa->size);
+ pr_debug("# table @%p, off %llx size %zx\n", tp->table,
+ topa->offset, topa->size);
for (i = 0; i < TENTS_PER_PAGE; i++) {
pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
- &topa->table[i],
- (unsigned long)topa->table[i].base << TOPA_SHIFT,
- sizes(topa->table[i].size),
- topa->table[i].end ? 'E' : ' ',
- topa->table[i].intr ? 'I' : ' ',
- topa->table[i].stop ? 'S' : ' ',
- *(u64 *)&topa->table[i]);
+ &tp->table[i],
+ (unsigned long)tp->table[i].base << TOPA_SHIFT,
+ sizes(tp->table[i].size),
+ tp->table[i].end ? 'E' : ' ',
+ tp->table[i].intr ? 'I' : ' ',
+ tp->table[i].stop ? 'S' : ' ',
+ *(u64 *)&tp->table[i]);
if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
- topa->table[i].stop) ||
- topa->table[i].end)
+ tp->table[i].stop) ||
+ tp->table[i].end)
break;
+ if (!i && topa->z_count)
+ i += topa->z_count;
}
}
}
@@ -771,7 +807,7 @@ static void pt_update_head(struct pt *pt)
/* offset of the current output region within this table */
for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
- base += sizes(buf->cur->table[topa_idx].size);
+ base += TOPA_ENTRY_SIZE(buf->cur, topa_idx);
if (buf->snapshot) {
local_set(&buf->data_size, base);
@@ -791,7 +827,7 @@ static void pt_update_head(struct pt *pt)
*/
static void *pt_buffer_region(struct pt_buffer *buf)
{
- return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
+ return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
}
/**
@@ -800,7 +836,7 @@ static void *pt_buffer_region(struct pt_buffer *buf)
*/
static size_t pt_buffer_region_size(struct pt_buffer *buf)
{
- return sizes(buf->cur->table[buf->cur_idx].size);
+ return TOPA_ENTRY_SIZE(buf->cur, buf->cur_idx);
}
/**
@@ -830,7 +866,7 @@ static void pt_handle_status(struct pt *pt)
* know.
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
- buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+ buf->output_off == pt_buffer_region_size(buf)) {
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
@@ -868,9 +904,11 @@ static void pt_handle_status(struct pt *pt)
static void pt_read_offset(struct pt_buffer *buf)
{
u64 offset, base_topa;
+ struct topa_page *tp;
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
- buf->cur = phys_to_virt(base_topa);
+ tp = phys_to_virt(base_topa);
+ buf->cur = &tp->topa;
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
/* offset within current output region */
@@ -879,29 +917,97 @@ static void pt_read_offset(struct pt_buffer *buf)
buf->cur_idx = (offset & 0xffffff80) >> 7;
}
-/**
- * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
- * @buf: PT buffer.
- * @pg: Page offset in the buffer.
- *
- * When advancing to the next output region (ToPA entry), given a page offset
- * into the buffer, we need to find the offset of the first page in the next
- * region.
- */
-static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
+static struct topa_entry *
+pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
{
- struct topa_entry *te = buf->topa_index[pg];
+ struct topa_page *tp;
+ struct topa *topa;
+ unsigned int idx, cur_pg = 0, z_pg = 0, start_idx = 0;
- /* one region */
- if (buf->first == buf->last && buf->first->last == 1)
- return pg;
+ /*
+ * Indicates a bug in the caller.
+ */
+ if (WARN_ON_ONCE(pg >= buf->nr_pages))
+ return NULL;
+
+ /*
+ * First, find the ToPA table where @pg fits. With high
+ * order allocations, there shouldn't be many of these.
+ */
+ list_for_each_entry(topa, &buf->tables, list) {
+ if (topa->offset + topa->size > pg << PAGE_SHIFT)
+ goto found;
+ }
+
+ /*
+ * Hitting this means we have a problem in the ToPA
+ * allocation code.
+ */
+ WARN_ON_ONCE(1);
- do {
- pg++;
- pg &= buf->nr_pages - 1;
- } while (buf->topa_index[pg] == te);
+ return NULL;
- return pg;
+found:
+ /*
+ * Indicates a problem in the ToPA allocation code.
+ */
+ if (WARN_ON_ONCE(topa->last == -1))
+ return NULL;
+
+ tp = topa_to_page(topa);
+ cur_pg = PFN_DOWN(topa->offset);
+ if (topa->z_count) {
+ z_pg = TOPA_ENTRY_PAGES(topa, 0) * (topa->z_count + 1);
+ start_idx = topa->z_count + 1;
+ }
+
+ /*
+ * Multiple entries at the beginning of the table have the same size,
+ * ideally all of them; if @pg falls there, the search is done.
+ */
+ if (pg >= cur_pg && pg < cur_pg + z_pg) {
+ idx = (pg - cur_pg) / TOPA_ENTRY_PAGES(topa, 0);
+ return &tp->table[idx];
+ }
+
+ /*
+ * Otherwise, slow path: iterate through the remaining entries.
+ */
+ for (idx = start_idx, cur_pg += z_pg; idx < topa->last; idx++) {
+ if (cur_pg + TOPA_ENTRY_PAGES(topa, idx) > pg)
+ return &tp->table[idx];
+
+ cur_pg += TOPA_ENTRY_PAGES(topa, idx);
+ }
+
+ /*
+ * Means we couldn't find a ToPA entry in the table that does match.
+ */
+ WARN_ON_ONCE(1);
+
+ return NULL;
+}
+
+static struct topa_entry *
+pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te)
+{
+ unsigned long table = (unsigned long)te & ~(PAGE_SIZE - 1);
+ struct topa_page *tp;
+ struct topa *topa;
+
+ tp = (struct topa_page *)table;
+ if (tp->table != te)
+ return --te;
+
+ topa = &tp->topa;
+ if (topa == buf->first)
+ topa = buf->last;
+ else
+ topa = list_prev_entry(topa, list);
+
+ tp = topa_to_page(topa);
+
+ return &tp->table[topa->last - 1];
}
/**
@@ -925,8 +1031,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
unsigned long idx, npages, wakeup;
/* can't stop in the middle of an output region */
- if (buf->output_off + handle->size + 1 <
- sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
+ if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
return -EINVAL;
}
@@ -937,9 +1042,13 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
return 0;
/* clear STOP and INT from current entry */
- buf->topa_index[buf->stop_pos]->stop = 0;
- buf->topa_index[buf->stop_pos]->intr = 0;
- buf->topa_index[buf->intr_pos]->intr = 0;
+ if (buf->stop_te) {
+ buf->stop_te->stop = 0;
+ buf->stop_te->intr = 0;
+ }
+
+ if (buf->intr_te)
+ buf->intr_te->intr = 0;
/* how many pages till the STOP marker */
npages = handle->size >> PAGE_SHIFT;
@@ -950,7 +1059,12 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
idx = (head >> PAGE_SHIFT) + npages;
idx &= buf->nr_pages - 1;
- buf->stop_pos = idx;
+
+ if (idx != buf->stop_pos) {
+ buf->stop_pos = idx;
+ buf->stop_te = pt_topa_entry_for_page(buf, idx);
+ buf->stop_te = pt_topa_prev_entry(buf, buf->stop_te);
+ }
wakeup = handle->wakeup >> PAGE_SHIFT;
@@ -960,51 +1074,20 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
idx = wakeup;
idx &= buf->nr_pages - 1;
- buf->intr_pos = idx;
+ if (idx != buf->intr_pos) {
+ buf->intr_pos = idx;
+ buf->intr_te = pt_topa_entry_for_page(buf, idx);
+ buf->intr_te = pt_topa_prev_entry(buf, buf->intr_te);
+ }
- buf->topa_index[buf->stop_pos]->stop = 1;
- buf->topa_index[buf->stop_pos]->intr = 1;
- buf->topa_index[buf->intr_pos]->intr = 1;
+ buf->stop_te->stop = 1;
+ buf->stop_te->intr = 1;
+ buf->intr_te->intr = 1;
return 0;
}
/**
- * pt_buffer_setup_topa_index() - build topa_index[] table of regions
- * @buf: PT buffer.
- *
- * topa_index[] references output regions indexed by offset into the
- * buffer for purposes of quick reverse lookup.
- */
-static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
-{
- struct topa *cur = buf->first, *prev = buf->last;
- struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
- *te_prev = TOPA_ENTRY(prev, prev->last - 1);
- int pg = 0, idx = 0;
-
- while (pg < buf->nr_pages) {
- int tidx;
-
- /* pages within one topa entry */
- for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++)
- buf->topa_index[pg] = te_prev;
-
- te_prev = te_cur;
-
- if (idx == cur->last - 1) {
- /* advance to next topa table */
- idx = 0;
- cur = list_entry(cur->list.next, struct topa, list);
- } else {
- idx++;
- }
- te_cur = TOPA_ENTRY(cur, idx);
- }
-
-}
-
-/**
* pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head
* @buf: PT buffer.
* @head: Write pointer (aux_head) from AUX buffer.
@@ -1021,18 +1104,20 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
*/
static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
{
+ struct topa_page *cur_tp;
+ struct topa_entry *te;
int pg;
if (buf->snapshot)
head &= (buf->nr_pages << PAGE_SHIFT) - 1;
pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
- pg = pt_topa_next_entry(buf, pg);
+ te = pt_topa_entry_for_page(buf, pg);
- buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK);
- buf->cur_idx = ((unsigned long)buf->topa_index[pg] -
- (unsigned long)buf->cur) / sizeof(struct topa_entry);
- buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1);
+ cur_tp = topa_entry_to_page(te);
+ buf->cur = &cur_tp->topa;
+ buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
+ buf->output_off = head & (pt_buffer_region_size(buf) - 1);
local64_set(&buf->head, head);
local_set(&buf->data_size, 0);
@@ -1061,31 +1146,29 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
* @size: Total size of all regions within this ToPA.
* @gfp: Allocation flags.
*/
-static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
- gfp_t gfp)
+static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
+ unsigned long nr_pages, gfp_t gfp)
{
struct topa *topa;
int err;
- topa = topa_alloc(buf->cpu, gfp);
+ topa = topa_alloc(cpu, gfp);
if (!topa)
return -ENOMEM;
topa_insert_table(buf, topa);
while (buf->nr_pages < nr_pages) {
- err = topa_insert_pages(buf, gfp);
+ err = topa_insert_pages(buf, cpu, gfp);
if (err) {
pt_buffer_fini_topa(buf);
return -ENOMEM;
}
}
- pt_buffer_setup_topa_index(buf);
-
/* link last table to the first one, unless we're double buffering */
if (intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT;
+ TOPA_ENTRY(buf->last, -1)->base = topa_pfn(buf->first);
TOPA_ENTRY(buf->last, -1)->end = 1;
}
@@ -1119,18 +1202,18 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
cpu = raw_smp_processor_id();
node = cpu_to_node(cpu);
- buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]),
- GFP_KERNEL, node);
+ buf = kzalloc_node(sizeof(struct pt_buffer), GFP_KERNEL, node);
if (!buf)
return NULL;
- buf->cpu = cpu;
buf->snapshot = snapshot;
buf->data_pages = pages;
+ buf->stop_pos = -1;
+ buf->intr_pos = -1;
INIT_LIST_HEAD(&buf->tables);
- ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL);
+ ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
if (ret) {
kfree(buf);
return NULL;
@@ -1296,7 +1379,7 @@ void intel_pt_interrupt(void)
return;
}
- pt_config_buffer(buf->cur->table, buf->cur_idx,
+ pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
buf->output_off);
pt_config(event);
}
@@ -1361,7 +1444,7 @@ static void pt_event_start(struct perf_event *event, int mode)
WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
- pt_config_buffer(buf->cur->table, buf->cur_idx,
+ pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
buf->output_off);
pt_config(event);
@@ -1481,6 +1564,11 @@ void cpu_emergency_stop_pt(void)
pt_event_stop(pt->handle.event, PERF_EF_UPDATE);
}
+int is_intel_pt_event(struct perf_event *event)
+{
+ return event->pmu == &pt_pmu.pmu;
+}
+
static __init int pt_init(void)
{
int ret, cpu, prior_warn = 0;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 63fe4063fbd6..1d2bb7572374 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -53,7 +53,6 @@ struct pt_pmu {
/**
* struct pt_buffer - buffer configuration; one buffer per task_struct or
* cpu, depending on perf event configuration
- * @cpu: cpu for per-cpu allocation
* @tables: list of ToPA tables in this buffer
* @first: shorthand for first topa table
* @last: shorthand for last topa table
@@ -65,13 +64,14 @@ struct pt_pmu {
* @lost: if data was lost/truncated
* @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter
- * @stop_pos: STOP topa entry in the buffer
- * @intr_pos: INT topa entry in the buffer
+ * @stop_pos: STOP topa entry index
+ * @intr_pos: INT topa entry index
+ * @stop_te: STOP topa entry pointer
+ * @intr_te: INT topa entry pointer
* @data_pages: array of pages from perf
* @topa_index: table of topa entries indexed by page offset
*/
struct pt_buffer {
- int cpu;
struct list_head tables;
struct topa *first, *last, *cur;
unsigned int cur_idx;
@@ -80,9 +80,9 @@ struct pt_buffer {
local_t data_size;
local64_t head;
bool snapshot;
- unsigned long stop_pos, intr_pos;
+ long stop_pos, intr_pos;
+ struct topa_entry *stop_te, *intr_te;
void **data_pages;
- struct topa_entry *topa_index[0];
};
#define PT_FILTERS_NUM 4
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 64ab51ffdf06..5053a403e4ae 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -634,7 +634,7 @@ static void cleanup_rapl_pmus(void)
kfree(rapl_pmus);
}
-const struct attribute_group *rapl_attr_update[] = {
+static const struct attribute_group *rapl_attr_update[] = {
&rapl_events_cores_group,
&rapl_events_pkg_group,
&rapl_events_ram_group,
@@ -720,27 +720,27 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL, model_hsw),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_L, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_G, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, model_hsw),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, model_hsx),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, model_hsx),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE, model_skl),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_L, model_skl),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, model_hsw),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_D, model_hsw),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, model_skl),
- X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, model_skl),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE, model_skl),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 3694a5d0703d..6fc2e06ab4c6 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1451,29 +1451,29 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EP, nhm_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, ivb_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, bdw_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_L, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_G, hsw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL, bdw_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_G, bdw_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EX, nhmex_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_WESTMERE_EX, nhmex_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, ivbep_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hswep_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_D, bdx_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE, skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_L, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, icl_uncore_init),
- X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_X, snr_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_L, skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE, skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init),
{},
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 9431447541e9..b1afc77f0704 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -59,22 +59,22 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_IVYBRIDGE:
case INTEL_FAM6_IVYBRIDGE_X:
- case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL:
case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_HASWELL_ULT:
- case INTEL_FAM6_HASWELL_GT3E:
+ case INTEL_FAM6_HASWELL_L:
+ case INTEL_FAM6_HASWELL_G:
- case INTEL_FAM6_BROADWELL_CORE:
- case INTEL_FAM6_BROADWELL_XEON_D:
- case INTEL_FAM6_BROADWELL_GT3E:
+ case INTEL_FAM6_BROADWELL:
+ case INTEL_FAM6_BROADWELL_D:
+ case INTEL_FAM6_BROADWELL_G:
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_ATOM_SILVERMONT:
- case INTEL_FAM6_ATOM_SILVERMONT_X:
+ case INTEL_FAM6_ATOM_SILVERMONT_D:
case INTEL_FAM6_ATOM_AIRMONT:
case INTEL_FAM6_ATOM_GOLDMONT:
- case INTEL_FAM6_ATOM_GOLDMONT_X:
+ case INTEL_FAM6_ATOM_GOLDMONT_D:
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
@@ -84,12 +84,12 @@ static bool test_intel(int idx, void *data)
return true;
break;
- case INTEL_FAM6_SKYLAKE_MOBILE:
- case INTEL_FAM6_SKYLAKE_DESKTOP:
+ case INTEL_FAM6_SKYLAKE_L:
+ case INTEL_FAM6_SKYLAKE:
case INTEL_FAM6_SKYLAKE_X:
- case INTEL_FAM6_KABYLAKE_MOBILE:
- case INTEL_FAM6_KABYLAKE_DESKTOP:
- case INTEL_FAM6_ICELAKE_MOBILE:
+ case INTEL_FAM6_KABYLAKE_L:
+ case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_ICELAKE_L:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
@@ -167,7 +167,7 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
-const struct attribute_group *attr_update[] = {
+static const struct attribute_group *attr_update[] = {
&group_aperf,
&group_mperf,
&group_pperf,
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8751008fc170..ecacfbf4ebc1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -76,6 +76,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
+#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -85,6 +86,11 @@ struct amd_nb {
};
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
+#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60)
+#define PEBS_OUTPUT_OFFSET 61
+#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET)
+#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET)
+#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD)
/*
* Flags PEBS can handle without an PMI.
@@ -211,6 +217,8 @@ struct cpu_hw_events {
u64 pebs_enabled;
int n_pebs;
int n_large_pebs;
+ int n_pebs_via_pt;
+ int pebs_output;
/* Current super set of events hardware configuration */
u64 pebs_data_cfg;
@@ -510,6 +518,8 @@ union perf_capabilities {
*/
u64 full_width_write:1;
u64 pebs_baseline:1;
+ u64 pebs_metrics_available:1;
+ u64 pebs_output_pt_available:1;
};
u64 capabilities;
};
@@ -692,6 +702,8 @@ struct x86_pmu {
* Check period value for PERF_EVENT_IOC_PERIOD ioctl.
*/
int (*check_period) (struct perf_event *event, u64 period);
+
+ int (*aux_output_match) (struct perf_event *event);
};
struct x86_perf_task_context {
@@ -901,6 +913,11 @@ static inline int amd_pmu_init(void)
#endif /* CONFIG_CPU_SUP_AMD */
+static inline int is_pebs_pt(struct perf_event *event)
+{
+ return !!(event->hw.flags & PERF_X86_EVENT_PEBS_VIA_PT);
+}
+
#ifdef CONFIG_CPU_SUP_INTEL
static inline bool intel_pmu_has_bts_period(struct perf_event *event, u64 period)
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index e65d7fe6489f..5208ba49c89a 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -37,12 +37,14 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
* Lower 12 bits encode the number of additional
* pages to flush (in addition to the 'cur' page).
*/
- if (diff >= HV_TLB_FLUSH_UNIT)
+ if (diff >= HV_TLB_FLUSH_UNIT) {
gva_list[gva_n] |= ~PAGE_MASK;
- else if (diff)
+ cur += HV_TLB_FLUSH_UNIT;
+ } else if (diff) {
gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
+ cur = end;
+ }
- cur += HV_TLB_FLUSH_UNIT;
gva_n++;
} while (cur < end);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 84f848c2541a..7f828fe49797 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -49,8 +49,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define array_index_mask_nospec array_index_mask_nospec
/* Prevent speculative execution past this barrier. */
-#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
- "lfence", X86_FEATURE_LFENCE_RDTSC)
+#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
#define dma_rmb() barrier()
#define dma_wmb() barrier()
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ba15d53c1ca7..7d1f6a49bfae 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -45,14 +45,13 @@
* We do the locked ops that don't return the old value as
* a mask operation on a byte.
*/
-#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
static __always_inline void
arch_set_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
+ if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "orb %1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr))
@@ -72,7 +71,7 @@ arch___set_bit(long nr, volatile unsigned long *addr)
static __always_inline void
arch_clear_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
+ if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "andb %1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)~CONST_MASK(nr)));
@@ -123,7 +122,7 @@ arch___change_bit(long nr, volatile unsigned long *addr)
static __always_inline void
arch_change_bit(long nr, volatile unsigned long *addr)
{
- if (IS_IMMEDIATE(nr)) {
+ if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr)));
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 101eb944f13c..981fe923a59f 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -18,6 +18,20 @@
* Note: efi_info is commonly left uninitialized, but that field has a
* private magic, so it is better to leave it unchanged.
*/
+
+#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); })
+
+#define BOOT_PARAM_PRESERVE(struct_member) \
+ { \
+ .start = offsetof(struct boot_params, struct_member), \
+ .len = sizeof_mbr(struct boot_params, struct_member), \
+ }
+
+struct boot_params_to_save {
+ unsigned int start;
+ unsigned int len;
+};
+
static void sanitize_boot_params(struct boot_params *boot_params)
{
/*
@@ -35,21 +49,41 @@ static void sanitize_boot_params(struct boot_params *boot_params)
* problems again.
*/
if (boot_params->sentinel) {
- /* fields in boot_params are left uninitialized, clear them */
- boot_params->acpi_rsdp_addr = 0;
- memset(&boot_params->ext_ramdisk_image, 0,
- (char *)&boot_params->efi_info -
- (char *)&boot_params->ext_ramdisk_image);
- memset(&boot_params->kbd_status, 0,
- (char *)&boot_params->hdr -
- (char *)&boot_params->kbd_status);
- memset(&boot_params->_pad7[0], 0,
- (char *)&boot_params->edd_mbr_sig_buffer[0] -
- (char *)&boot_params->_pad7[0]);
- memset(&boot_params->_pad8[0], 0,
- (char *)&boot_params->eddbuf[0] -
- (char *)&boot_params->_pad8[0]);
- memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
+ static struct boot_params scratch;
+ char *bp_base = (char *)boot_params;
+ char *save_base = (char *)&scratch;
+ int i;
+
+ const struct boot_params_to_save to_save[] = {
+ BOOT_PARAM_PRESERVE(screen_info),
+ BOOT_PARAM_PRESERVE(apm_bios_info),
+ BOOT_PARAM_PRESERVE(tboot_addr),
+ BOOT_PARAM_PRESERVE(ist_info),
+ BOOT_PARAM_PRESERVE(hd0_info),
+ BOOT_PARAM_PRESERVE(hd1_info),
+ BOOT_PARAM_PRESERVE(sys_desc_table),
+ BOOT_PARAM_PRESERVE(olpc_ofw_header),
+ BOOT_PARAM_PRESERVE(efi_info),
+ BOOT_PARAM_PRESERVE(alt_mem_k),
+ BOOT_PARAM_PRESERVE(scratch),
+ BOOT_PARAM_PRESERVE(e820_entries),
+ BOOT_PARAM_PRESERVE(eddbuf_entries),
+ BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
+ BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+ BOOT_PARAM_PRESERVE(secure_boot),
+ BOOT_PARAM_PRESERVE(hdr),
+ BOOT_PARAM_PRESERVE(e820_table),
+ BOOT_PARAM_PRESERVE(eddbuf),
+ };
+
+ memset(&scratch, 0, sizeof(scratch));
+
+ for (i = 0; i < ARRAY_SIZE(to_save); i++) {
+ memcpy(save_base + to_save[i].start,
+ bp_base + to_save[i].start, to_save[i].len);
+ }
+
+ memcpy(boot_params, save_base, sizeof(*boot_params));
}
}
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 58acda503817..59bf91c57aa8 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -61,6 +61,13 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \
(((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
+/*
+ * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the
+ * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all
+ * header macros which use NCAPINTS need to be changed. The duplicated macro
+ * use causes the compiler to issue errors for all headers so that all usage
+ * sites can be corrected.
+ */
#define REQUIRED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 998c2cc08363..3be4dcb1f80f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,6 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -281,6 +280,8 @@
#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
+#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
+#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -353,6 +354,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
@@ -394,5 +396,6 @@
#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
+#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 20a46150e0a8..9b8cb50768c2 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -73,6 +73,19 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
#else
# include <asm-generic/div64.h>
+
+static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
+{
+ u64 q;
+
+ asm ("mulq %2; divq %3" : "=a" (q)
+ : "a" (a), "rm" ((u64)mul), "rm" ((u64)div)
+ : "rdx");
+
+ return q;
+}
+#define mul_u64_u32_div mul_u64_u32_div
+
#endif /* CONFIG_X86_32 */
#endif /* _ASM_X86_DIV64_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 606a4b6a9812..43a82e59c59d 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -242,6 +242,7 @@ static inline bool efi_is_64bit(void)
__efi_early()->runtime_services), __VA_ARGS__)
extern bool efi_reboot_required(void);
+extern bool efi_is_table_address(unsigned long phys_addr);
#else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
@@ -249,6 +250,10 @@ static inline bool efi_reboot_required(void)
{
return false;
}
+static inline bool efi_is_table_address(unsigned long phys_addr)
+{
+ return false;
+}
#endif /* CONFIG_EFI */
#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
deleted file mode 100644
index 47b7a1296245..000000000000
--- a/arch/x86/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-asmlinkage void just_return_func(void);
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 9da8cccdf3fb..0c47aa82e2e2 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -42,8 +42,7 @@
* Because of this, FIXADDR_TOP x86 integration was left as later work.
*/
#ifdef CONFIG_X86_32
-/* used by vmalloc.c, vsyscall.lds.S.
- *
+/*
* Leave one empty page between vmalloc'ed areas and
* the start of the fixmap.
*/
@@ -120,7 +119,7 @@ enum fixed_addresses {
* before ioremap() is functional.
*
* If necessary we round it up to the next 512 pages boundary so
- * that we can have a single pgd entry and a single pte table:
+ * that we can have a single pmd entry and a single pte table:
*/
#define NR_FIX_BTMAPS 64
#define FIX_BTMAPS_SLOTS 8
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 287f1f7b2e52..c38a66661576 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -16,7 +16,6 @@
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
#ifndef __ASSEMBLY__
-extern void mcount(void);
extern atomic_t modifying_ftrace_code;
extern void __fentry__(void);
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 0278aa66ef62..f04622500da3 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -5,12 +5,34 @@
/*
* "Big Core" Processors (Branded as Core, Xeon, etc...)
*
- * The "_X" parts are generally the EP and EX Xeons, or the
- * "Extreme" ones, like Broadwell-E, or Atom microserver.
- *
* While adding a new CPUID for a new microarchitecture, add a new
* group to keep logically sorted out in chronological order. Within
* that group keep the CPUID for the variants sorted by model number.
+ *
+ * The defined symbol names have the following form:
+ * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF}
+ * where:
+ * OPTFAMILY Describes the family of CPUs that this belongs to. Default
+ * is assumed to be "_CORE" (and should be omitted). Other values
+ * currently in use are _ATOM and _XEON_PHI
+ * MICROARCH Is the code name for the micro-architecture for this core.
+ * N.B. Not the platform name.
+ * OPTDIFF If needed, a short string to differentiate by market segment.
+ *
+ * Common OPTDIFFs:
+ *
+ * - regular client parts
+ * _L - regular mobile parts
+ * _G - parts with extra graphics on
+ * _X - regular server parts
+ * _D - micro server parts
+ *
+ * Historical OPTDIFFs:
+ *
+ * _EP - 2 socket server parts
+ * _EX - 4+ socket server parts
+ *
+ * The #define line may optionally include a comment including platform names.
*/
#define INTEL_FAM6_CORE_YONAH 0x0E
@@ -34,30 +56,33 @@
#define INTEL_FAM6_IVYBRIDGE 0x3A
#define INTEL_FAM6_IVYBRIDGE_X 0x3E
-#define INTEL_FAM6_HASWELL_CORE 0x3C
+#define INTEL_FAM6_HASWELL 0x3C
#define INTEL_FAM6_HASWELL_X 0x3F
-#define INTEL_FAM6_HASWELL_ULT 0x45
-#define INTEL_FAM6_HASWELL_GT3E 0x46
+#define INTEL_FAM6_HASWELL_L 0x45
+#define INTEL_FAM6_HASWELL_G 0x46
-#define INTEL_FAM6_BROADWELL_CORE 0x3D
-#define INTEL_FAM6_BROADWELL_GT3E 0x47
+#define INTEL_FAM6_BROADWELL 0x3D
+#define INTEL_FAM6_BROADWELL_G 0x47
#define INTEL_FAM6_BROADWELL_X 0x4F
-#define INTEL_FAM6_BROADWELL_XEON_D 0x56
+#define INTEL_FAM6_BROADWELL_D 0x56
-#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
-#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
+#define INTEL_FAM6_SKYLAKE_L 0x4E
+#define INTEL_FAM6_SKYLAKE 0x5E
#define INTEL_FAM6_SKYLAKE_X 0x55
-#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E
-#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E
+#define INTEL_FAM6_KABYLAKE_L 0x8E
+#define INTEL_FAM6_KABYLAKE 0x9E
-#define INTEL_FAM6_CANNONLAKE_MOBILE 0x66
+#define INTEL_FAM6_CANNONLAKE_L 0x66
#define INTEL_FAM6_ICELAKE_X 0x6A
-#define INTEL_FAM6_ICELAKE_XEON_D 0x6C
-#define INTEL_FAM6_ICELAKE_DESKTOP 0x7D
-#define INTEL_FAM6_ICELAKE_MOBILE 0x7E
+#define INTEL_FAM6_ICELAKE_D 0x6C
+#define INTEL_FAM6_ICELAKE 0x7D
+#define INTEL_FAM6_ICELAKE_L 0x7E
#define INTEL_FAM6_ICELAKE_NNPI 0x9D
+#define INTEL_FAM6_TIGERLAKE_L 0x8C
+#define INTEL_FAM6_TIGERLAKE 0x8D
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
@@ -68,17 +93,21 @@
#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
+#define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */
#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
+#define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
-#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
+#define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */
+
+/* Note: the micro-architecture is "Goldmont Plus" */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
-#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */
+#define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */
+#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
/* Xeon Phi */
diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h
index 634f99b1dc22..423b788f495e 100644
--- a/arch/x86/include/asm/intel_pt.h
+++ b/arch/x86/include/asm/intel_pt.h
@@ -28,10 +28,12 @@ enum pt_capabilities {
void cpu_emergency_stop_pt(void);
extern u32 intel_pt_validate_hw_cap(enum pt_capabilities cap);
extern u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities cap);
+extern int is_intel_pt_event(struct perf_event *event);
#else
static inline void cpu_emergency_stop_pt(void) {}
static inline u32 intel_pt_validate_hw_cap(enum pt_capabilities cap) { return 0; }
static inline u32 intel_pt_validate_cap(u32 *caps, enum pt_capabilities capability) { return 0; }
+static inline int is_intel_pt_event(struct perf_event *event) { return 0; }
#endif
#endif /* _ASM_X86_INTEL_PT_H */
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index baedab8ac538..b91623d521d9 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -4,7 +4,6 @@
extern int force_iommu, no_iommu;
extern int iommu_detected;
-extern int iommu_pass_through;
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8282b8d41209..bdc16b0aa7c6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -35,6 +35,8 @@
#include <asm/kvm_vcpu_regs.h>
#include <asm/hyperv-tlfs.h>
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+
#define KVM_MAX_VCPUS 288
#define KVM_SOFT_MAX_VCPUS 240
#define KVM_MAX_VCPU_ID 1023
@@ -333,6 +335,7 @@ struct kvm_mmu_page {
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
+ unsigned long mmu_valid_gen;
DECLARE_BITMAP(unsync_child_bitmap, 512);
#ifdef CONFIG_X86_32
@@ -607,15 +610,16 @@ struct kvm_vcpu_arch {
/*
* QEMU userspace and the guest each have their own FPU state.
- * In vcpu_run, we switch between the user, maintained in the
- * task_struct struct, and guest FPU contexts. While running a VCPU,
- * the VCPU thread will have the guest FPU context.
+ * In vcpu_run, we switch between the user and guest FPU contexts.
+ * While running a VCPU, the VCPU thread will have the guest FPU
+ * context.
*
* Note that while the PKRU state lives inside the fpu registers,
* it is switched out separately at VMENTER and VMEXIT time. The
* "guest_fpu" state here contains the guest FPU context, with the
* host PRKU bits.
*/
+ struct fpu *user_fpu;
struct fpu *guest_fpu;
u64 xcr0;
@@ -853,6 +857,7 @@ struct kvm_arch {
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
+ unsigned long mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
@@ -1174,6 +1179,7 @@ struct kvm_x86_ops {
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+ bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
bool *expired);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 9024236693d2..16ae821483c8 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -28,16 +28,16 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
-static inline void load_mm_cr4(struct mm_struct *mm)
+static inline void load_mm_cr4_irqsoff(struct mm_struct *mm)
{
if (static_branch_unlikely(&rdpmc_always_available_key) ||
atomic_read(&mm->context.perf_rdpmc_allowed))
- cr4_set_bits(X86_CR4_PCE);
+ cr4_set_bits_irqsoff(X86_CR4_PCE);
else
- cr4_clear_bits(X86_CR4_PCE);
+ cr4_clear_bits_irqsoff(X86_CR4_PCE);
}
#else
-static inline void load_mm_cr4(struct mm_struct *mm) {}
+static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {}
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6b4fc2788078..20ce682a2540 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -375,13 +375,22 @@
/* Alternative perfctr range with full access. */
#define MSR_IA32_PMC0 0x000004c1
-/* AMD64 MSRs. Not complete. See the architecture manual for a more
- complete list. */
+/* Auto-reload via MSR instead of DS area */
+#define MSR_RELOAD_PMC0 0x000014c1
+#define MSR_RELOAD_FIXED_CTR0 0x00001309
+/*
+ * AMD64 MSRs. Not complete. See the architecture manual for a more
+ * complete list.
+ */
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_PATCH_LOADER 0xc0010020
+#define MSR_AMD_PERF_CTL 0xc0010062
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD64_LS_CFG 0xc0011020
@@ -560,9 +569,6 @@
#define MSR_IA32_PERF_STATUS 0x00000198
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
-#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
-#define MSR_AMD_PERF_STATUS 0xc0010063
-#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 5cc3930cb465..86f20d520a07 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -233,8 +233,7 @@ static __always_inline unsigned long long rdtsc_ordered(void)
* Thus, use the preferred barrier on the respective CPU, aiming for
* RDTSCP as the default.
*/
- asm volatile(ALTERNATIVE_3("rdtsc",
- "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC,
+ asm volatile(ALTERNATIVE_2("rdtsc",
"lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC,
"rdtscp", X86_FEATURE_RDTSCP)
: EAX_EDX_RET(val, low, high)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 109f974f9835..80bc209c0708 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -192,7 +192,7 @@
" lfence;\n" \
" jmp 902b;\n" \
" .align 16\n" \
- "903: addl $4, %%esp;\n" \
+ "903: lea 4(%%esp), %%esp;\n" \
" pushl %[thunk_target];\n" \
" ret;\n" \
" .align 16\n" \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1392d5e6e8d6..ee26e9215f18 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -252,16 +252,20 @@ struct pebs_lbr {
#define IBSCTL_LVT_OFFSET_VALID (1ULL<<8)
#define IBSCTL_LVT_OFFSET_MASK 0x0F
-/* ibs fetch bits/masks */
+/* IBS fetch bits/masks */
#define IBS_FETCH_RAND_EN (1ULL<<57)
#define IBS_FETCH_VAL (1ULL<<49)
#define IBS_FETCH_ENABLE (1ULL<<48)
#define IBS_FETCH_CNT 0xFFFF0000ULL
#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
-/* ibs op bits/masks */
-/* lower 4 bits of the current count are ignored: */
-#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32)
+/*
+ * IBS op bits/masks
+ * The lower 7 bits of the current count are random bits
+ * preloaded by hardware and ignored in software
+ */
+#define IBS_OP_CUR_CNT (0xFFF80ULL<<32)
+#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..3d4cb83a8828 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -102,7 +102,7 @@ static __always_inline bool should_resched(int preempt_offset)
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
extern asmlinkage void ___preempt_schedule(void);
# define __preempt_schedule() \
asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index bd5ac6cc37db..444d6fd9a6d8 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -63,10 +63,25 @@ static inline bool vcpu_is_preempted(long cpu)
#endif
#ifdef CONFIG_PARAVIRT
+/*
+ * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack.
+ *
+ * Native (and PV wanting native due to vCPU pinning) should disable this key.
+ * It is done in this backwards fashion to only have a single direction change,
+ * which removes ordering between native_pv_spin_init() and HV setup.
+ */
DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
void native_pv_lock_init(void) __init;
+/*
+ * Shortcut for the queued_spin_lock_slowpath() function that allows
+ * virt to hijack it.
+ *
+ * Returns:
+ * true - lock has been negotiated, all done;
+ * false - queued_spin_lock_slowpath() will do its thing.
+ */
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index c53682303c9c..09ecc32f6524 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -20,7 +20,6 @@ struct real_mode_header {
u32 ro_end;
/* SMP trampoline */
u32 trampoline_start;
- u32 trampoline_status;
u32 trampoline_header;
#ifdef CONFIG_X86_64
u32 trampoline_pgd;
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index ae7b909dc242..2ee8e469dcf5 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -40,7 +40,6 @@ int _set_memory_wt(unsigned long addr, int numpages);
int _set_memory_wb(unsigned long addr, int numpages);
int set_memory_uc(unsigned long addr, int numpages);
int set_memory_wc(unsigned long addr, int numpages);
-int set_memory_wt(unsigned long addr, int numpages);
int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
@@ -48,11 +47,6 @@ int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
int set_memory_np_noalias(unsigned long addr, int numpages);
-int set_memory_array_uc(unsigned long *addr, int addrinarray);
-int set_memory_array_wc(unsigned long *addr, int addrinarray);
-int set_memory_array_wt(unsigned long *addr, int addrinarray);
-int set_memory_array_wb(unsigned long *addr, int addrinarray);
-
int set_pages_array_uc(struct page **pages, int addrinarray);
int set_pages_array_wc(struct page **pages, int addrinarray);
int set_pages_array_wt(struct page **pages, int addrinarray);
@@ -80,8 +74,6 @@ int set_pages_array_wb(struct page **pages, int addrinarray);
int set_pages_uc(struct page *page, int numpages);
int set_pages_wb(struct page *page, int numpages);
-int set_pages_x(struct page *page, int numpages);
-int set_pages_nx(struct page *page, int numpages);
int set_pages_ro(struct page *page, int numpages);
int set_pages_rw(struct page *page, int numpages);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2dc4a021beea..8db3fdb6102e 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -36,6 +36,10 @@ extern const sys_call_ptr_t sys_call_table[];
extern const sys_call_ptr_t ia32_sys_call_table[];
#endif
+#ifdef CONFIG_X86_X32_ABI
+extern const sys_call_ptr_t x32_sys_call_table[];
+#endif
+
/*
* Only the low 32 bits of orig_ax are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 70c09967a999..5e8319bb207a 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -45,8 +45,8 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
* no thread can be preempted in the instructions being modified (no iret to an
* invalid instruction possible) or if the instructions are changed from a
* consistent state to another consistent state atomically.
- * On the local CPU you need to be protected again NMI or MCE handlers seeing an
- * inconsistent instruction while you patch.
+ * On the local CPU you need to be protected against NMI or MCE handlers seeing
+ * an inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index dee375831962..6f66d841262d 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -290,26 +290,42 @@ static inline void __cr4_set(unsigned long cr4)
}
/* Set in this cpu's CR4. */
-static inline void cr4_set_bits(unsigned long mask)
+static inline void cr4_set_bits_irqsoff(unsigned long mask)
{
- unsigned long cr4, flags;
+ unsigned long cr4;
- local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 | mask) != cr4)
__cr4_set(cr4 | mask);
- local_irq_restore(flags);
}
/* Clear in this cpu's CR4. */
-static inline void cr4_clear_bits(unsigned long mask)
+static inline void cr4_clear_bits_irqsoff(unsigned long mask)
{
- unsigned long cr4, flags;
+ unsigned long cr4;
- local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4);
if ((cr4 & ~mask) != cr4)
__cr4_set(cr4 & ~mask);
+}
+
+/* Set in this cpu's CR4. */
+static inline void cr4_set_bits(unsigned long mask)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cr4_set_bits_irqsoff(mask);
+ local_irq_restore(flags);
+}
+
+/* Clear in this cpu's CR4. */
+static inline void cr4_clear_bits(unsigned long mask)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cr4_clear_bits_irqsoff(mask);
local_irq_restore(flags);
}
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 9c4435307ff8..35c225ede0e4 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -444,8 +444,10 @@ __pu_label: \
({ \
int __gu_err; \
__inttype(*(ptr)) __gu_val; \
+ __typeof__(ptr) __gu_ptr = (ptr); \
+ __typeof__(size) __gu_size = (size); \
__uaccess_begin_nospec(); \
- __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \
+ __get_user_size(__gu_val, __gu_ptr, __gu_size, __gu_err, -EFAULT); \
__uaccess_end(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
__builtin_expect(__gu_err, 0); \
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 097589753fec..a7dd080749ce 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -5,12 +5,6 @@
#include <uapi/asm/unistd.h>
-# ifdef CONFIG_X86_X32_ABI
-# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
-# else
-# define __SYSCALL_MASK (~0)
-# endif
-
# ifdef CONFIG_X86_32
# include <asm/unistd_32.h>
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e60c45fd3679..6bc6d89d8e2a 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,10 +12,12 @@ struct mm_struct;
#ifdef CONFIG_X86_UV
#include <linux/efi.h>
+extern unsigned long uv_systab_phys;
+
extern enum uv_system_type get_uv_system_type(void);
static inline bool is_early_uv_system(void)
{
- return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+ return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
}
extern int is_uv_system(void);
extern int is_uv_hubless(void);
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index ae91429129a6..ba71a63cdac4 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -96,6 +96,8 @@ long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
#else
+#define VDSO_HAS_32BIT_FALLBACK 1
+
static __always_inline
long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
{
@@ -114,6 +116,23 @@ long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
}
static __always_inline
+long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+static __always_inline
long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
struct timezone *_tz)
{
@@ -148,6 +167,23 @@ clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
return ret;
}
+static __always_inline
+long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
#endif
#ifdef CONFIG_PARAVIRT_CLOCK
diff --git a/arch/x86/include/uapi/asm/byteorder.h b/arch/x86/include/uapi/asm/byteorder.h
index 484e3cfd7ef2..149143cab9ff 100644
--- a/arch/x86/include/uapi/asm/byteorder.h
+++ b/arch/x86/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_BYTEORDER_H
#define _ASM_X86_BYTEORDER_H
diff --git a/arch/x86/include/uapi/asm/errno.h b/arch/x86/include/uapi/asm/errno.h
deleted file mode 100644
index 4c82b503d92f..000000000000
--- a/arch/x86/include/uapi/asm/errno.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/errno.h>
diff --git a/arch/x86/include/uapi/asm/fcntl.h b/arch/x86/include/uapi/asm/fcntl.h
deleted file mode 100644
index 46ab12db5739..000000000000
--- a/arch/x86/include/uapi/asm/fcntl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/fcntl.h>
diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h
index 6ebaae90e207..8b2effe6efb8 100644
--- a/arch/x86/include/uapi/asm/hwcap2.h
+++ b/arch/x86/include/uapi/asm/hwcap2.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_HWCAP2_H
#define _ASM_X86_HWCAP2_H
diff --git a/arch/x86/include/uapi/asm/ioctl.h b/arch/x86/include/uapi/asm/ioctl.h
deleted file mode 100644
index b279fe06dfe5..000000000000
--- a/arch/x86/include/uapi/asm/ioctl.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctl.h>
diff --git a/arch/x86/include/uapi/asm/ioctls.h b/arch/x86/include/uapi/asm/ioctls.h
deleted file mode 100644
index ec34c760665e..000000000000
--- a/arch/x86/include/uapi/asm/ioctls.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ioctls.h>
diff --git a/arch/x86/include/uapi/asm/ipcbuf.h b/arch/x86/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 84c7e51cb6d0..000000000000
--- a/arch/x86/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/x86/include/uapi/asm/param.h b/arch/x86/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/x86/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/x86/include/uapi/asm/resource.h b/arch/x86/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/x86/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/x86/include/uapi/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h
index 6b18e88de8a6..7114801d0499 100644
--- a/arch/x86/include/uapi/asm/sigcontext32.h
+++ b/arch/x86/include/uapi/asm/sigcontext32.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_SIGCONTEXT32_H
#define _ASM_X86_SIGCONTEXT32_H
diff --git a/arch/x86/include/uapi/asm/termbits.h b/arch/x86/include/uapi/asm/termbits.h
deleted file mode 100644
index 3935b106de79..000000000000
--- a/arch/x86/include/uapi/asm/termbits.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termbits.h>
diff --git a/arch/x86/include/uapi/asm/termios.h b/arch/x86/include/uapi/asm/termios.h
deleted file mode 100644
index 280d78a9d966..000000000000
--- a/arch/x86/include/uapi/asm/termios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/termios.h>
diff --git a/arch/x86/include/uapi/asm/types.h b/arch/x86/include/uapi/asm/types.h
deleted file mode 100644
index df55e1ddb0c9..000000000000
--- a/arch/x86/include/uapi/asm/types.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_TYPES_H
-#define _ASM_X86_TYPES_H
-
-#include <asm-generic/types.h>
-
-#endif /* _ASM_X86_TYPES_H */
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
index 30d7d04d72d6..196fdd02b8b1 100644
--- a/arch/x86/include/uapi/asm/unistd.h
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -3,7 +3,7 @@
#define _UAPI_ASM_X86_UNISTD_H
/* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT 0x40000000
+#define __X32_SYSCALL_BIT 0x40000000UL
#ifndef __KERNEL__
# ifdef __i386__
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index b0715c3ac18d..7f9ade13bbcf 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -18,8 +18,13 @@ ENTRY(wakeup_long64)
movq saved_magic, %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
- jne bogus_64_magic
+ je 2f
+ /* stop here on a saved_magic mismatch */
+ movq $0xbad6d61676963, %rcx
+1:
+ jmp 1b
+2:
movw $__KERNEL_DS, %ax
movw %ax, %ss
movw %ax, %ds
@@ -37,9 +42,6 @@ ENTRY(wakeup_long64)
jmp *%rax
ENDPROC(wakeup_long64)
-bogus_64_magic:
- jmp bogus_64_magic
-
ENTRY(do_suspend_lowlevel)
FRAME_BEGIN
subq $8, %rsp
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ccd32013c47a..9d3a971ea364 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -713,7 +713,7 @@ void __init alternative_instructions(void)
* Don't stop machine check exceptions while patching.
* MCEs only happen when something got corrupted and in this
* case we must do something about the corruption.
- * Ignoring it is worse than a unlikely patching race.
+ * Ignoring it is worse than an unlikely patching race.
* Also machine checks tend to be broadcast and if one CPU
* goes into machine check the others follow quickly, so we don't
* expect a machine check to cause undue problems during to code
@@ -753,8 +753,8 @@ void __init alternative_instructions(void)
* When you use this code to patch more than one byte of an instruction
* you need to make sure that other CPUs cannot execute this code in parallel.
* Also no thread must be currently preempted in the middle of these
- * instructions. And on the local CPU you need to be protected again NMI or MCE
- * handlers seeing an inconsistent instruction while you patch.
+ * instructions. And on the local CPU you need to be protected against NMI or
+ * MCE handlers seeing an inconsistent instruction while you patch.
*/
void __init_or_module text_poke_early(void *addr, const void *opcode,
size_t len)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index d63e63b7d1d9..251c795b4eb3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -21,6 +21,7 @@
#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
/* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex);
@@ -50,6 +51,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{}
};
EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -63,6 +65,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f5291362da1a..909abb2d59eb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -590,21 +590,21 @@ static u32 skx_deadline_rev(void)
static const struct x86_cpu_id deadline_match[] = {
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
- DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev),
+ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev),
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E, 0x17),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL, 0x22),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L, 0x20),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G, 0x17),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE, 0x25),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E, 0x17),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL, 0x25),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G, 0x17),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE, 0xb2),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP, 0xb2),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L, 0xb2),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE, 0xb2),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE, 0x52),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP, 0x52),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L, 0x52),
+ DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE, 0x52),
{},
};
@@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
/*
- * Temporary interrupt handler.
+ * Temporary interrupt handler and polled calibration function.
*/
static void __init lapic_cal_handler(struct clock_event_device *dev)
{
@@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)
if (!boot_cpu_has(X86_FEATURE_APIC))
return true;
+ /* Virt guests may lack ARAT, but still have DEADLINE */
+ if (!boot_cpu_has(X86_FEATURE_ARAT))
+ return true;
+
/* Deadline timer is based on TSC so no further PIT action required */
if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
return false;
@@ -851,7 +855,8 @@ bool __init apic_needs_pit(void)
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
- void (*real_handler)(struct clock_event_device *dev);
+ u64 tsc_perj = 0, tsc_start = 0;
+ unsigned long jif_start;
unsigned long deltaj;
long delta, deltatsc;
int pm_referenced = 0;
@@ -878,28 +883,64 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
+ /*
+ * There are platforms w/o global clockevent devices. Instead of
+ * making the calibration conditional on that, use a polling based
+ * approach everywhere.
+ */
local_irq_disable();
- /* Replace the global interrupt handler */
- real_handler = global_clock_event->event_handler;
- global_clock_event->event_handler = lapic_cal_handler;
-
/*
* Setup the APIC counter to maximum. There is no way the lapic
* can underflow in the 100ms detection time frame
*/
__setup_APIC_LVTT(0xffffffff, 0, 0);
- /* Let the interrupts run */
+ /*
+ * Methods to terminate the calibration loop:
+ * 1) Global clockevent if available (jiffies)
+ * 2) TSC if available and frequency is known
+ */
+ jif_start = READ_ONCE(jiffies);
+
+ if (tsc_khz) {
+ tsc_start = rdtsc();
+ tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
+ }
+
+ /*
+ * Enable interrupts so the tick can fire, if a global
+ * clockevent device is available
+ */
local_irq_enable();
- while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
- cpu_relax();
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
+ /* Wait for a tick to elapse */
+ while (1) {
+ if (tsc_khz) {
+ u64 tsc_now = rdtsc();
+ if ((tsc_now - tsc_start) >= tsc_perj) {
+ tsc_start += tsc_perj;
+ break;
+ }
+ } else {
+ unsigned long jif_now = READ_ONCE(jiffies);
- local_irq_disable();
+ if (time_after(jif_now, jif_start)) {
+ jif_start = jif_now;
+ break;
+ }
+ }
+ cpu_relax();
+ }
- /* Restore the real event handler */
- global_clock_event->event_handler = real_handler;
+ /* Invoke the calibration routine */
+ local_irq_disable();
+ lapic_cal_handler(NULL);
+ local_irq_enable();
+ }
+
+ local_irq_disable();
/* Build delta t1-t2 as apic timer counts down */
delta = lapic_cal_t1 - lapic_cal_t2;
@@ -943,10 +984,11 @@ static int __init calibrate_APIC_clock(void)
levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
/*
- * PM timer calibration failed or not turned on
- * so lets try APIC timer based calibration
+ * PM timer calibration failed or not turned on so lets try APIC
+ * timer based calibration, if a global clockevent device is
+ * available.
*/
- if (!pm_referenced) {
+ if (!pm_referenced && global_clock_event) {
apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
/*
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index afee386ff711..caedd8d60d36 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -38,32 +38,12 @@ static int bigsmp_early_logical_apicid(int cpu)
return early_per_cpu(x86_cpu_to_apicid, cpu);
}
-static inline unsigned long calculate_ldr(int cpu)
-{
- unsigned long val, id;
-
- val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
- id = per_cpu(x86_bios_cpu_apicid, cpu);
- val |= SET_APIC_LOGICAL_ID(id);
-
- return val;
-}
-
/*
- * Set up the logical destination ID.
- *
- * Intel recommends to set DFR, LDR and TPR before enabling
- * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
- * document number 292116). So here it goes...
+ * bigsmp enables physical destination mode
+ * and doesn't use LDR and DFR
*/
static void bigsmp_init_apic_ldr(void)
{
- unsigned long val;
- int cpu = smp_processor_id();
-
- apic_write(APIC_DFR, APIC_DFR_FLAT);
- val = calculate_ldr(cpu);
- apic_write(APIC_LDR, val);
}
static void bigsmp_setup_apic_routing(void)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index c7bb6c69f21c..d6af97fd170a 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2438,7 +2438,13 @@ unsigned int arch_dynirq_lower_bound(unsigned int from)
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
- return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
+ if (!ioapic_initialized)
+ return gsi_top;
+ /*
+ * For DT enabled machines ioapic_dynirq_base is irrelevant and not
+ * updated. So simply return @from if ioapic_dynirq_base == 0.
+ */
+ return ioapic_dynirq_base ? : from;
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1492799b8f43..ee2d91e382f1 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -184,7 +184,8 @@ void __init default_setup_apic_routing(void)
def_to_bigsmp = 0;
break;
}
- /* If P4 and above fall through */
+ /* P4 and above */
+ /* fall through */
case X86_VENDOR_HYGON:
case X86_VENDOR_AMD:
def_to_bigsmp = 1;
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d3d075226c0a..70e97727a26a 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -6,13 +6,28 @@
#include <asm/ia32.h>
#define __SYSCALL_64(nr, sym, qual) [nr] = 1,
+#define __SYSCALL_X32(nr, sym, qual)
static char syscalls_64[] = {
#include <asm/syscalls_64.h>
};
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#ifdef CONFIG_X86_X32_ABI
+#define __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
+static char syscalls_x32[] = {
+#include <asm/syscalls_64.h>
+};
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+#endif
+
#define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
+#undef __SYSCALL_I386
#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
#include <asm/kvm_para.h>
@@ -80,6 +95,11 @@ int main(void)
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
DEFINE(NR_syscalls, sizeof(syscalls_64));
+#ifdef CONFIG_X86_X32_ABI
+ DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
+ DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
+#endif
+
DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8d4e50428b68..90f75e515876 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/random.h>
+#include <linux/topology.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cacheinfo.h>
@@ -804,6 +805,64 @@ static void init_amd_ln(struct cpuinfo_x86 *c)
msr_set_bit(MSR_AMD64_DE_CFG, 31);
}
+static bool rdrand_force;
+
+static int __init rdrand_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "force"))
+ rdrand_force = true;
+ else
+ return -EINVAL;
+
+ return 0;
+}
+early_param("rdrand", rdrand_cmdline);
+
+static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c)
+{
+ /*
+ * Saving of the MSR used to hide the RDRAND support during
+ * suspend/resume is done by arch/x86/power/cpu.c, which is
+ * dependent on CONFIG_PM_SLEEP.
+ */
+ if (!IS_ENABLED(CONFIG_PM_SLEEP))
+ return;
+
+ /*
+ * The nordrand option can clear X86_FEATURE_RDRAND, so check for
+ * RDRAND support using the CPUID function directly.
+ */
+ if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force)
+ return;
+
+ msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62);
+
+ /*
+ * Verify that the CPUID change has occurred in case the kernel is
+ * running virtualized and the hypervisor doesn't support the MSR.
+ */
+ if (cpuid_ecx(1) & BIT(30)) {
+ pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n");
+ return;
+ }
+
+ clear_cpu_cap(c, X86_FEATURE_RDRAND);
+ pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n");
+}
+
+static void init_amd_jg(struct cpuinfo_x86 *c)
+{
+ /*
+ * Some BIOS implementations do not restore proper RDRAND support
+ * across suspend and resume. Check on whether to hide the RDRAND
+ * instruction support via CPUID.
+ */
+ clear_rdrand_cpuid_bit(c);
+}
+
static void init_amd_bd(struct cpuinfo_x86 *c)
{
u64 value;
@@ -818,12 +877,23 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
wrmsrl_safe(MSR_F15H_IC_CFG, value);
}
}
+
+ /*
+ * Some BIOS implementations do not restore proper RDRAND support
+ * across suspend and resume. Check on whether to hide the RDRAND
+ * instruction support via CPUID.
+ */
+ clear_rdrand_cpuid_bit(c);
}
static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
+#ifdef CONFIG_NUMA
+ node_reclaim_distance = 32;
+#endif
+
/*
* Fix erratum 1076: CPB feature bit not being set in CPUID.
* Always set it, except when running under a hypervisor.
@@ -860,6 +930,7 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x10: init_amd_gh(c); break;
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
+ case 0x16: init_amd_jg(c); break;
case 0x17: init_amd_zn(c); break;
}
@@ -879,12 +950,8 @@ static void init_amd(struct cpuinfo_x86 *c)
init_amd_cacheinfo(c);
if (cpu_has(c, X86_FEATURE_XMM2)) {
- unsigned long long val;
- int ret;
-
/*
- * A serializing LFENCE has less overhead than MFENCE, so
- * use it for execution serialization. On families which
+ * Use LFENCE for execution serialization. On families which
* don't have that MSR, LFENCE is already serializing.
* msr_set_bit() uses the safe accessors, too, even if the MSR
* is not present.
@@ -892,19 +959,8 @@ static void init_amd(struct cpuinfo_x86 *c)
msr_set_bit(MSR_F10H_DECFG,
MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
- /*
- * Verify that the MSR write was successful (could be running
- * under a hypervisor) and only then assume that LFENCE is
- * serializing.
- */
- ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
- if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
- /* A serializing LFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
- } else {
- /* MFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
- }
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
}
/*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 66ca906aa790..0b569f10d4a0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -34,6 +34,7 @@
#include "cpu.h"
+static void __init spectre_v1_select_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
@@ -98,17 +99,11 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_STIBP))
x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
- /* Select the proper spectre mitigation before patching alternatives */
+ /* Select the proper CPU mitigations before patching alternatives: */
+ spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
-
- /*
- * Select proper mitigation for any exposure to the Speculative Store
- * Bypass vulnerability.
- */
ssb_select_mitigation();
-
l1tf_select_mitigation();
-
mds_select_mitigation();
arch_smt_update();
@@ -274,6 +269,98 @@ static int __init mds_cmdline(char *str)
early_param("mds", mds_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V1 : " fmt
+
+enum spectre_v1_mitigation {
+ SPECTRE_V1_MITIGATION_NONE,
+ SPECTRE_V1_MITIGATION_AUTO,
+};
+
+static enum spectre_v1_mitigation spectre_v1_mitigation __ro_after_init =
+ SPECTRE_V1_MITIGATION_AUTO;
+
+static const char * const spectre_v1_strings[] = {
+ [SPECTRE_V1_MITIGATION_NONE] = "Vulnerable: __user pointer sanitization and usercopy barriers only; no swapgs barriers",
+ [SPECTRE_V1_MITIGATION_AUTO] = "Mitigation: usercopy/swapgs barriers and __user pointer sanitization",
+};
+
+/*
+ * Does SMAP provide full mitigation against speculative kernel access to
+ * userspace?
+ */
+static bool smap_works_speculatively(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_SMAP))
+ return false;
+
+ /*
+ * On CPUs which are vulnerable to Meltdown, SMAP does not
+ * prevent speculative access to user data in the L1 cache.
+ * Consider SMAP to be non-functional as a mitigation on these
+ * CPUs.
+ */
+ if (boot_cpu_has(X86_BUG_CPU_MELTDOWN))
+ return false;
+
+ return true;
+}
+
+static void __init spectre_v1_select_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1) || cpu_mitigations_off()) {
+ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
+ return;
+ }
+
+ if (spectre_v1_mitigation == SPECTRE_V1_MITIGATION_AUTO) {
+ /*
+ * With Spectre v1, a user can speculatively control either
+ * path of a conditional swapgs with a user-controlled GS
+ * value. The mitigation is to add lfences to both code paths.
+ *
+ * If FSGSBASE is enabled, the user can put a kernel address in
+ * GS, in which case SMAP provides no protection.
+ *
+ * [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the
+ * FSGSBASE enablement patches have been merged. ]
+ *
+ * If FSGSBASE is disabled, the user can only put a user space
+ * address in GS. That makes an attack harder, but still
+ * possible if there's no SMAP protection.
+ */
+ if (!smap_works_speculatively()) {
+ /*
+ * Mitigation can be provided from SWAPGS itself or
+ * PTI as the CR3 write in the Meltdown mitigation
+ * is serializing.
+ *
+ * If neither is there, mitigate with an LFENCE to
+ * stop speculation through swapgs.
+ */
+ if (boot_cpu_has_bug(X86_BUG_SWAPGS) &&
+ !boot_cpu_has(X86_FEATURE_PTI))
+ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_USER);
+
+ /*
+ * Enable lfences in the kernel entry (non-swapgs)
+ * paths, to prevent user entry from speculatively
+ * skipping swapgs.
+ */
+ setup_force_cpu_cap(X86_FEATURE_FENCE_SWAPGS_KERNEL);
+ }
+ }
+
+ pr_info("%s\n", spectre_v1_strings[spectre_v1_mitigation]);
+}
+
+static int __init nospectre_v1_cmdline(char *str)
+{
+ spectre_v1_mitigation = SPECTRE_V1_MITIGATION_NONE;
+ return 0;
+}
+early_param("nospectre_v1", nospectre_v1_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
@@ -1097,15 +1184,15 @@ static void override_cache_bits(struct cpuinfo_x86 *c)
case INTEL_FAM6_WESTMERE:
case INTEL_FAM6_SANDYBRIDGE:
case INTEL_FAM6_IVYBRIDGE:
- case INTEL_FAM6_HASWELL_CORE:
- case INTEL_FAM6_HASWELL_ULT:
- case INTEL_FAM6_HASWELL_GT3E:
- case INTEL_FAM6_BROADWELL_CORE:
- case INTEL_FAM6_BROADWELL_GT3E:
- case INTEL_FAM6_SKYLAKE_MOBILE:
- case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_KABYLAKE_MOBILE:
- case INTEL_FAM6_KABYLAKE_DESKTOP:
+ case INTEL_FAM6_HASWELL:
+ case INTEL_FAM6_HASWELL_L:
+ case INTEL_FAM6_HASWELL_G:
+ case INTEL_FAM6_BROADWELL:
+ case INTEL_FAM6_BROADWELL_G:
+ case INTEL_FAM6_SKYLAKE_L:
+ case INTEL_FAM6_SKYLAKE:
+ case INTEL_FAM6_KABYLAKE_L:
+ case INTEL_FAM6_KABYLAKE:
if (c->x86_cache_bits < 44)
c->x86_cache_bits = 44;
break;
@@ -1226,7 +1313,7 @@ static ssize_t l1tf_show_state(char *buf)
static ssize_t mds_show_state(char *buf)
{
- if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
return sprintf(buf, "%s; SMT Host state unknown\n",
mds_strings[mds_mitigation]);
}
@@ -1290,7 +1377,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
break;
case X86_BUG_SPECTRE_V1:
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
case X86_BUG_SPECTRE_V2:
return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 11472178e17f..030e52749a74 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1022,6 +1022,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_L1TF BIT(3)
#define NO_MDS BIT(4)
#define MSBDS_ONLY BIT(5)
+#define NO_SWAPGS BIT(6)
#define VULNWL(_vendor, _family, _model, _whitelist) \
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1048,30 +1049,39 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY),
- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY),
+ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
VULNWL_INTEL(CORE_YONAH, NO_SSB),
- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY),
+ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS),
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF),
- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF),
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF),
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS),
+
+ /*
+ * Technically, swapgs isn't serializing on AMD (despite it previously
+ * being documented as such in the APM). But according to AMD, %gs is
+ * updated non-speculatively, and the issuing of %gs-relative memory
+ * operands will be blocked until the %gs update completes, which is
+ * good enough for our purposes.
+ */
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
{}
};
@@ -1108,6 +1118,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
}
+ if (!cpu_matches(NO_SWAPGS))
+ setup_force_cpu_bug(X86_BUG_SWAPGS);
+
if (cpu_matches(NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index b5353244749b..3cbe24ca80ab 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -20,54 +20,55 @@ struct cpuid_dep {
* but it's difficult to tell that to the init reference checker.
*/
static const struct cpuid_dep cpuid_deps[] = {
- { X86_FEATURE_FXSR, X86_FEATURE_FPU },
- { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
- { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
- { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
- { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
- { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
- { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
- { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
- { X86_FEATURE_CMOV, X86_FEATURE_FXSR },
- { X86_FEATURE_MMX, X86_FEATURE_FXSR },
- { X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
- { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
- { X86_FEATURE_XSAVE, X86_FEATURE_FXSR },
- { X86_FEATURE_XMM, X86_FEATURE_FXSR },
- { X86_FEATURE_XMM2, X86_FEATURE_XMM },
- { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
- { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
- { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
- { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
- { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
- { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
- { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
- { X86_FEATURE_AES, X86_FEATURE_XMM2 },
- { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
- { X86_FEATURE_FMA, X86_FEATURE_AVX },
- { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
- { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
- { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
- { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
- { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
- { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
- { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
- { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
- { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
- { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
- { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC },
- { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
- { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
- { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_FXSR, X86_FEATURE_FPU },
+ { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
+ { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
+ { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_CMOV, X86_FEATURE_FXSR },
+ { X86_FEATURE_MMX, X86_FEATURE_FXSR },
+ { X86_FEATURE_MMXEXT, X86_FEATURE_MMX },
+ { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
+ { X86_FEATURE_XSAVE, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM2, X86_FEATURE_XMM },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
+ { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
+ { X86_FEATURE_AES, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
+ { X86_FEATURE_FMA, X86_FEATURE_AVX },
+ { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VP2INTERSECT, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC },
+ { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL },
{}
};
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 415621ddb8a2..4e28c1fc8749 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -330,12 +330,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
init_hygon_cacheinfo(c);
if (cpu_has(c, X86_FEATURE_XMM2)) {
- unsigned long long val;
- int ret;
-
/*
- * A serializing LFENCE has less overhead than MFENCE, so
- * use it for execution serialization. On families which
+ * Use LFENCE for execution serialization. On families which
* don't have that MSR, LFENCE is already serializing.
* msr_set_bit() uses the safe accessors, too, even if the MSR
* is not present.
@@ -343,19 +339,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
msr_set_bit(MSR_F10H_DECFG,
MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
- /*
- * Verify that the MSR write was successful (could be running
- * under a hypervisor) and only then assume that LFENCE is
- * serializing.
- */
- ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
- if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
- /* A serializing LFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
- } else {
- /* MFENCE stops RDTSC speculation */
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
- }
+ /* A serializing LFENCE stops RDTSC speculation */
+ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
}
/*
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8d6d92ebeb54..c2fdc00df163 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -142,21 +142,21 @@ struct sku_microcode {
u32 microcode;
};
static const struct sku_microcode spectre_bad_microcodes[] = {
- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0B, 0x80 },
- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x0A, 0x80 },
- { INTEL_FAM6_KABYLAKE_DESKTOP, 0x09, 0x80 },
- { INTEL_FAM6_KABYLAKE_MOBILE, 0x0A, 0x80 },
- { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
+ { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 },
+ { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 },
+ { INTEL_FAM6_KABYLAKE, 0x09, 0x80 },
+ { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 },
+ { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 },
{ INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
{ INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
- { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
- { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
- { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
- { INTEL_FAM6_BROADWELL_XEON_D, 0x03, 0x07000011 },
+ { INTEL_FAM6_BROADWELL, 0x04, 0x28 },
+ { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b },
+ { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 },
+ { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 },
{ INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
- { INTEL_FAM6_HASWELL_ULT, 0x01, 0x21 },
- { INTEL_FAM6_HASWELL_GT3E, 0x01, 0x18 },
- { INTEL_FAM6_HASWELL_CORE, 0x03, 0x23 },
+ { INTEL_FAM6_HASWELL_L, 0x01, 0x21 },
+ { INTEL_FAM6_HASWELL_G, 0x01, 0x18 },
+ { INTEL_FAM6_HASWELL, 0x03, 0x23 },
{ INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
{ INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
{ INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
@@ -265,9 +265,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
if (c->x86 == 6) {
switch (c->x86_model) {
- case 0x27: /* Penwell */
- case 0x35: /* Cloverview */
- case 0x4a: /* Merrifield */
+ case INTEL_FAM6_ATOM_SALTWELL_MID:
+ case INTEL_FAM6_ATOM_SALTWELL_TABLET:
+ case INTEL_FAM6_ATOM_SILVERMONT_MID:
+ case INTEL_FAM6_ATOM_AIRMONT_NP:
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
break;
default:
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index e43eb6732630..88cd9598fa57 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -479,7 +479,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c)
switch (c->x86_model) {
case INTEL_FAM6_IVYBRIDGE_X:
case INTEL_FAM6_HASWELL_X:
- case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_D:
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_XEON_PHI_KNL:
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 210f1f5db5f7..87bcdc6dc2f0 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -107,11 +107,11 @@ static struct severity {
*/
MCESEV(
AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
+ SER, MASK(MCI_UC_AR|MCACOD_SCRUBMSK, MCI_STATUS_UC|MCACOD_SCRUB)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
+ SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
),
/* ignore OVER for UCNA */
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 4296c702a3f7..72182809b333 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -98,6 +98,7 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
case 7:
if (size < 0x40)
break;
+ /* Else, fall through */
case 6:
case 5:
case 4:
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index 6a204e7336c1..32b4dc9030aa 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -18,6 +18,12 @@
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
/*
+ * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
+ * hardware or BIOS before kernel boot.
+ */
+static u32 orig_umwait_control_cached __ro_after_init;
+
+/*
* Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
* the sysfs write functions.
*/
@@ -53,6 +59,23 @@ static int umwait_cpu_online(unsigned int cpu)
}
/*
+ * The CPU hotplug callback sets the control MSR to the original control
+ * value.
+ */
+static int umwait_cpu_offline(unsigned int cpu)
+{
+ /*
+ * This code is protected by the CPU hotplug already and
+ * orig_umwait_control_cached is never changed after it caches
+ * the original control MSR value in umwait_init(). So there
+ * is no race condition here.
+ */
+ wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0);
+
+ return 0;
+}
+
+/*
* On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
* is the only active CPU at this time. The MSR is set up on the APs via the
* CPU hotplug callback.
@@ -185,8 +208,22 @@ static int __init umwait_init(void)
if (!boot_cpu_has(X86_FEATURE_WAITPKG))
return -ENODEV;
+ /*
+ * Cache the original control MSR value before the control MSR is
+ * changed. This is the only place where orig_umwait_control_cached
+ * is modified.
+ */
+ rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
- umwait_cpu_online, NULL);
+ umwait_cpu_online, umwait_cpu_offline);
+ if (ret < 0) {
+ /*
+ * On failure, the control MSR on all CPUs has the
+ * original control value.
+ */
+ return ret;
+ }
register_syscore_ops(&umwait_syscore_ops);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2bf70a2fed90..eb651fbde92a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -225,8 +225,6 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
if (crashk_low_res.end) {
ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
crashk_low_res.end);
- if (ret)
- return ret;
}
return ret;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2b5886401e5f..e07424e19274 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -367,13 +367,18 @@ NOKPROBE_SYMBOL(oops_end);
int __die(const char *str, struct pt_regs *regs, long err)
{
+ const char *pr = "";
+
/* Save the regs of the first oops for the executive summary later. */
if (!die_counter)
exec_summary_regs = *regs;
+ if (IS_ENABLED(CONFIG_PREEMPTION))
+ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
printk(KERN_DEFAULT
"%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+ pr,
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a6342c899be5..f3d3e9646a99 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -193,10 +193,10 @@ ENTRY(secondary_startup_64)
/* Set up %gs.
*
- * The base of %gs always points to the bottom of the irqstack
- * union. If the stack protector canary is enabled, it is
- * located at %gs:40. Note that, on SMP, the boot cpu uses
- * init data section till per cpu areas are set up.
+ * The base of %gs always points to fixed_percpu_data. If the
+ * stack protector canary is enabled, it is located at %gs:40.
+ * Note that, on SMP, the boot cpu uses init data section until
+ * the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
movl initial_gs(%rip),%eax
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c43e96a938d0..c6f791bc481e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -827,10 +827,6 @@ int __init hpet_enable(void)
if (!hpet_cfg_working())
goto out_nohpet;
- /* Validate that the counter is counting */
- if (!hpet_counting())
- goto out_nohpet;
-
/*
* Read the period and check for a sane value:
*/
@@ -896,6 +892,14 @@ int __init hpet_enable(void)
}
hpet_print_config();
+ /*
+ * Validate that the counter is counting. This needs to be done
+ * after sanitizing the config registers to properly deal with
+ * force enabled HPETs.
+ */
+ if (!hpet_counting())
+ goto out_nohpet;
+
clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
if (id & HPET_ID_LEGSUP) {
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0e0b08008b5a..43fc13c831af 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -580,7 +580,7 @@ static void setup_singlestep(struct kprobe *p, struct pt_regs *regs,
if (setup_detour_execution(p, regs, reenter))
return;
-#if !defined(CONFIG_PREEMPT)
+#if !defined(CONFIG_PREEMPTION)
if (p->ainsn.boostable && !p->post_handler) {
/* Boost up -- we can execute copied instructions directly */
if (!reenter)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 9d4aedece363..b348dd506d58 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -403,7 +403,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
(u8 *)op->kp.addr + op->optinsn.size);
len += RELATIVEJUMP_SIZE;
- /* We have to use text_poke for instuction buffer because it is RO */
+ /* We have to use text_poke() for instruction buffer because it is RO */
text_poke(slot, buf, len);
ret = 0;
out:
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b7f34fe2171e..4cc967178bf9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -308,13 +308,10 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
static void kvm_guest_cpu_init(void)
{
- if (!kvm_para_available())
- return;
-
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
#endif
pa |= KVM_ASYNC_PF_ENABLED;
@@ -625,9 +622,6 @@ static void __init kvm_guest_init(void)
{
int i;
- if (!kvm_para_available())
- return;
-
paravirt_ops_setup();
register_reboot_notifier(&kvm_pv_reboot_nb);
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
@@ -848,8 +842,6 @@ asm(
*/
void __init kvm_spinlock_init(void)
{
- if (!kvm_para_available())
- return;
/* Does host kernel support KVM_FEATURE_PV_UNHALT? */
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 77854b192fef..7b45e8daad22 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -148,7 +148,7 @@ int machine_kexec_prepare(struct kimage *image)
{
int error;
- set_pages_x(image->control_code_page, 1);
+ set_memory_x((unsigned long)page_address(image->control_code_page), 1);
error = machine_kexec_alloc_page_tables(image);
if (error)
return error;
@@ -162,7 +162,7 @@ int machine_kexec_prepare(struct kimage *image)
*/
void machine_kexec_cleanup(struct kimage *image)
{
- set_pages_nx(image->control_code_page, 1);
+ set_memory_nx((unsigned long)page_address(image->control_code_page), 1);
machine_kexec_free_page_tables(image);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f62b498b18fb..fa4352dce491 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/dma-direct.h>
#include <linux/dma-debug.h>
+#include <linux/iommu.h>
#include <linux/dmar.h>
#include <linux/export.h>
#include <linux/memblock.h>
@@ -34,21 +35,6 @@ int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
-/*
- * This variable becomes 1 if iommu=pt is passed on the kernel command line.
- * If this variable is 1, IOMMU implementations do no DMA translation for
- * devices and allow every device to access to whole physical memory. This is
- * useful if a user wants to use an IOMMU only for KVM device assignment to
- * guests and not for driver dma translation.
- * It is also possible to disable by default in kernel config, and enable with
- * iommu=nopt at boot time.
- */
-#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH
-int iommu_pass_through __read_mostly = 1;
-#else
-int iommu_pass_through __read_mostly;
-#endif
-
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
void __init pci_iommu_alloc(void)
@@ -120,9 +106,9 @@ static __init int iommu_setup(char *p)
swiotlb = 1;
#endif
if (!strncmp(p, "pt", 2))
- iommu_pass_through = 1;
+ iommu_set_default_passthrough(true);
if (!strncmp(p, "nopt", 4))
- iommu_pass_through = 0;
+ iommu_set_default_translated(true);
gart_parse_options(p);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0fdbe89d0754..3c5bbe8e4120 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -201,6 +201,7 @@ static int set_segment_reg(struct task_struct *task,
case offsetof(struct user_regs_struct, ss):
if (unlikely(value == 0))
return -EIO;
+ /* Else, fall through */
default:
*pt_regs_access(task_pt_regs(task), offset) = value;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 8451f38ad399..1daf8f2aa21f 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -90,8 +90,6 @@ static void ich_force_hpet_resume(void)
BUG();
else
printk(KERN_DEBUG "Force enabled HPET at resume\n");
-
- return;
}
static void ich_force_enable_hpet(struct pci_dev *dev)
@@ -448,7 +446,6 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev)
dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
force_hpet_address);
cached_dev = dev;
- return;
}
/* ISA Bridges */
@@ -513,7 +510,6 @@ static void e6xx_force_enable_hpet(struct pci_dev *dev)
force_hpet_resume_type = NONE_FORCE_HPET_RESUME;
dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
"0x%lx\n", force_hpet_address);
- return;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
e6xx_force_enable_hpet);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fdbd47ceb84d..497e9b7077c1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1023,8 +1023,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
int *cpu0_nmi_registered)
{
- volatile u32 *trampoline_status =
- (volatile u32 *) __va(real_mode_header->trampoline_status);
/* start_ip had better be page-aligned! */
unsigned long start_ip = real_mode_header->trampoline_start;
@@ -1116,9 +1114,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
}
}
- /* mark "stuck" area as not stuck */
- *trampoline_status = 0;
-
if (x86_platform.legacy.warm_reset) {
/*
* Cleanup possible dangling ends...
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4f36d3241faf..2d6898c2cb64 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -100,7 +100,7 @@ copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
{
int ret;
- if (!access_ok(fp, sizeof(*frame)))
+ if (__range_not_ok(fp, sizeof(*frame), TASK_SIZE))
return 0;
ret = 1;
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index 8eb67a670b10..653b7f617b61 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -230,9 +230,55 @@ static const struct dmi_system_id efifb_dmi_system_table[] __initconst = {
{},
};
+/*
+ * Some devices have a portrait LCD but advertise a landscape resolution (and
+ * pitch). We simply swap width and height for these devices so that we can
+ * correctly deal with some of them coming with multiple resolutions.
+ */
+static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
+ {
+ /*
+ * Lenovo MIIX310-10ICR, only some batches have the troublesome
+ * 800x1280 portrait screen. Luckily the portrait version has
+ * its own BIOS version, so we match on that.
+ */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"),
+ DMI_EXACT_MATCH(DMI_BIOS_VERSION, "1HCN44WW"),
+ },
+ },
+ {
+ /* Lenovo MIIX 320-10ICR with 800x1280 portrait screen */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
+ "Lenovo MIIX 320-10ICR"),
+ },
+ },
+ {
+ /* Lenovo D330 with 800x1280 or 1200x1920 portrait screen */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
+ "Lenovo ideapad D330-10IGM"),
+ },
+ },
+ {},
+};
+
__init void sysfb_apply_efi_quirks(void)
{
if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
!(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
dmi_check_system(efifb_dmi_system_table);
+
+ if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
+ dmi_check_system(efifb_dmi_swap_width_height)) {
+ u16 temp = screen_info.lfb_width;
+
+ screen_info.lfb_width = screen_info.lfb_height;
+ screen_info.lfb_height = temp;
+ screen_info.lfb_linelength = 4 * screen_info.lfb_width;
+ }
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 57d87f79558f..c59454c382fd 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -638,7 +638,7 @@ unsigned long native_calibrate_tsc(void)
* clock.
*/
if (crystal_khz == 0 &&
- boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
+ boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_D)
crystal_khz = 25000;
/*
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 067858fe4db8..e0cbe4f2af49 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -58,6 +58,10 @@ static const struct freq_desc freq_desc_ann = {
1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 }
};
+static const struct freq_desc freq_desc_lgm = {
+ 1, { 78000, 78000, 78000, 78000, 78000, 78000, 78000, 78000 }
+};
+
static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
INTEL_CPU_FAM6(ATOM_SALTWELL_MID, freq_desc_pnw),
INTEL_CPU_FAM6(ATOM_SALTWELL_TABLET, freq_desc_clv),
@@ -65,6 +69,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, freq_desc_tng),
INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
INTEL_CPU_FAM6(ATOM_AIRMONT_MID, freq_desc_ann),
+ INTEL_CPU_FAM6(ATOM_AIRMONT_NP, freq_desc_lgm),
{}
};
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 5b345add550f..548fefed71ee 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -19,7 +19,7 @@
/** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
*
* The feature User-Mode Instruction Prevention present in recent Intel
- * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str)
+ * processor prevents a group of instructions (SGDT, SIDT, SLDT, SMSW and STR)
* from being executed with CPL > 0. Otherwise, a general protection fault is
* issued.
*
@@ -36,8 +36,8 @@
* DOSEMU2) rely on this subset of instructions to function.
*
* The instructions protected by UMIP can be split in two groups. Those which
- * return a kernel memory address (sgdt and sidt) and those which return a
- * value (sldt, str and smsw).
+ * return a kernel memory address (SGDT and SIDT) and those which return a
+ * value (SLDT, STR and SMSW).
*
* For the instructions that return a kernel memory address, applications
* such as WineHQ rely on the result being located in the kernel memory space,
@@ -45,15 +45,13 @@
* value that, lies close to the top of the kernel memory. The limit for the GDT
* and the IDT are set to zero.
*
- * Given that sldt and str are not commonly used in programs that run on WineHQ
+ * Given that SLDT and STR are not commonly used in programs that run on WineHQ
* or DOSEMU2, they are not emulated.
*
* The instruction smsw is emulated to return the value that the register CR0
* has at boot time as set in the head_32.
*
- * Also, emulation is provided only for 32-bit processes; 64-bit processes
- * that attempt to use the instructions that UMIP protects will receive the
- * SIGSEGV signal issued as a consequence of the general protection fault.
+ * Emulation is provided for both 32-bit and 64-bit processes.
*
* Care is taken to appropriately emulate the results when segmentation is
* used. That is, rather than relying on USER_DS and USER_CS, the function
@@ -63,17 +61,18 @@
* application uses a local descriptor table.
*/
-#define UMIP_DUMMY_GDT_BASE 0xfffe0000
-#define UMIP_DUMMY_IDT_BASE 0xffff0000
+#define UMIP_DUMMY_GDT_BASE 0xfffffffffffe0000ULL
+#define UMIP_DUMMY_IDT_BASE 0xffffffffffff0000ULL
/*
* The SGDT and SIDT instructions store the contents of the global descriptor
* table and interrupt table registers, respectively. The destination is a
* memory operand of X+2 bytes. X bytes are used to store the base address of
- * the table and 2 bytes are used to store the limit. In 32-bit processes, the
- * only processes for which emulation is provided, X has a value of 4.
+ * the table and 2 bytes are used to store the limit. In 32-bit processes X
+ * has a value of 4, in 64-bit processes X has a value of 8.
*/
-#define UMIP_GDT_IDT_BASE_SIZE 4
+#define UMIP_GDT_IDT_BASE_SIZE_64BIT 8
+#define UMIP_GDT_IDT_BASE_SIZE_32BIT 4
#define UMIP_GDT_IDT_LIMIT_SIZE 2
#define UMIP_INST_SGDT 0 /* 0F 01 /0 */
@@ -189,6 +188,7 @@ static int identify_insn(struct insn *insn)
* @umip_inst: A constant indicating the instruction to emulate
* @data: Buffer into which the dummy result is stored
* @data_size: Size of the emulated result
+ * @x86_64: true if process is 64-bit, false otherwise
*
* Emulate an instruction protected by UMIP and provide a dummy result. The
* result of the emulation is saved in @data. The size of the results depends
@@ -202,11 +202,8 @@ static int identify_insn(struct insn *insn)
* 0 on success, -EINVAL on error while emulating.
*/
static int emulate_umip_insn(struct insn *insn, int umip_inst,
- unsigned char *data, int *data_size)
+ unsigned char *data, int *data_size, bool x86_64)
{
- unsigned long dummy_base_addr, dummy_value;
- unsigned short dummy_limit = 0;
-
if (!data || !data_size || !insn)
return -EINVAL;
/*
@@ -219,6 +216,9 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
* is always returned irrespective of the operand size.
*/
if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
+ u64 dummy_base_addr;
+ u16 dummy_limit = 0;
+
/* SGDT and SIDT do not use registers operands. */
if (X86_MODRM_MOD(insn->modrm.value) == 3)
return -EINVAL;
@@ -228,13 +228,24 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
else
dummy_base_addr = UMIP_DUMMY_IDT_BASE;
- *data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE;
+ /*
+ * 64-bit processes use the entire dummy base address.
+ * 32-bit processes use the lower 32 bits of the base address.
+ * dummy_base_addr is always 64 bits, but we memcpy the correct
+ * number of bytes from it to the destination.
+ */
+ if (x86_64)
+ *data_size = UMIP_GDT_IDT_BASE_SIZE_64BIT;
+ else
+ *data_size = UMIP_GDT_IDT_BASE_SIZE_32BIT;
+
+ memcpy(data + 2, &dummy_base_addr, *data_size);
- memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE);
+ *data_size += UMIP_GDT_IDT_LIMIT_SIZE;
memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
} else if (umip_inst == UMIP_INST_SMSW) {
- dummy_value = CR0_STATE;
+ unsigned long dummy_value = CR0_STATE;
/*
* Even though the CR0 register has 4 bytes, the number
@@ -290,11 +301,10 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
* fixup_umip_exception() - Fixup a general protection fault caused by UMIP
* @regs: Registers as saved when entering the #GP handler
*
- * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
- * fault if executed with CPL > 0 (i.e., from user space). If the offending
- * user-space process is not in long mode, this function fixes the exception
- * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not
- * fixed up. Also long mode user-space processes are not fixed up.
+ * The instructions SGDT, SIDT, STR, SMSW and SLDT cause a general protection
+ * fault if executed with CPL > 0 (i.e., from user space). This function fixes
+ * the exception up and provides dummy results for SGDT, SIDT and SMSW; STR
+ * and SLDT are not fixed up.
*
* If operands are memory addresses, results are copied to user-space memory as
* indicated by the instruction pointed by eIP using the registers indicated in
@@ -373,13 +383,14 @@ bool fixup_umip_exception(struct pt_regs *regs)
umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]);
- /* Do not emulate SLDT, STR or user long mode processes. */
- if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+ /* Do not emulate (spoof) SLDT or STR. */
+ if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
return false;
umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
- if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
+ if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
+ user_64bit_mode(regs)))
return false;
/*
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index d8359ebeea70..8cd745ef8c7b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -508,9 +508,12 @@ struct uprobe_xol_ops {
void (*abort)(struct arch_uprobe *, struct pt_regs *);
};
-static inline int sizeof_long(void)
+static inline int sizeof_long(struct pt_regs *regs)
{
- return in_ia32_syscall() ? 4 : 8;
+ /*
+ * Check registers for mode as in_xxx_syscall() does not apply here.
+ */
+ return user_64bit_mode(regs) ? 8 : 4;
}
static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
@@ -521,9 +524,9 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
static int emulate_push_stack(struct pt_regs *regs, unsigned long val)
{
- unsigned long new_sp = regs->sp - sizeof_long();
+ unsigned long new_sp = regs->sp - sizeof_long(regs);
- if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
+ if (copy_to_user((void __user *)new_sp, &val, sizeof_long(regs)))
return -EFAULT;
regs->sp = new_sp;
@@ -556,7 +559,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
long correction = utask->vaddr - utask->xol_vaddr;
regs->ip += correction;
} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
- regs->sp += sizeof_long(); /* Pop incorrect return address */
+ regs->sp += sizeof_long(regs); /* Pop incorrect return address */
if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen))
return -ERESTART;
}
@@ -675,7 +678,7 @@ static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
* "call" insn was executed out-of-line. Just restore ->sp and restart.
* We could also restore ->ip and try to call branch_emulate_op() again.
*/
- regs->sp += sizeof_long();
+ regs->sp += sizeof_long(regs);
return -ERESTART;
}
@@ -1056,7 +1059,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
unsigned long
arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
{
- int rasize = sizeof_long(), nleft;
+ int rasize = sizeof_long(regs), nleft;
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 329361b69d5e..018aebce33ff 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -8,11 +8,6 @@
#include <linux/debugfs.h>
#include "lapic.h"
-bool kvm_arch_has_vcpu_debugfs(void)
-{
- return true;
-}
-
static int vcpu_get_timer_advance_ns(void *data, u64 *val)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
@@ -48,37 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
{
- struct dentry *ret;
-
- ret = debugfs_create_file("tsc-offset", 0444,
- vcpu->debugfs_dentry,
- vcpu, &vcpu_tsc_offset_fops);
- if (!ret)
- return -ENOMEM;
+ debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu,
+ &vcpu_tsc_offset_fops);
- if (lapic_in_kernel(vcpu)) {
- ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
- vcpu->debugfs_dentry,
- vcpu, &vcpu_timer_advance_ns_fops);
- if (!ret)
- return -ENOMEM;
- }
+ if (lapic_in_kernel(vcpu))
+ debugfs_create_file("lapic_timer_advance_ns", 0444,
+ vcpu->debugfs_dentry, vcpu,
+ &vcpu_timer_advance_ns_fops);
if (kvm_has_tsc_control) {
- ret = debugfs_create_file("tsc-scaling-ratio", 0444,
- vcpu->debugfs_dentry,
- vcpu, &vcpu_tsc_scaling_fops);
- if (!ret)
- return -ENOMEM;
- ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
- vcpu->debugfs_dentry,
- vcpu, &vcpu_tsc_scaling_frac_fops);
- if (!ret)
- return -ENOMEM;
-
+ debugfs_create_file("tsc-scaling-ratio", 0444,
+ vcpu->debugfs_dentry, vcpu,
+ &vcpu_tsc_scaling_fops);
+ debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
+ vcpu->debugfs_dentry, vcpu,
+ &vcpu_tsc_scaling_frac_fops);
}
-
- return 0;
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index c10a8b10b203..fff790a3f4ee 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1781,7 +1781,7 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
- uint16_t evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu);
+ uint16_t evmcs_ver = 0;
struct kvm_cpuid_entry2 cpuid_entries[] = {
{ .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS },
{ .function = HYPERV_CPUID_INTERFACE },
@@ -1793,6 +1793,9 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
};
int i, nent = ARRAY_SIZE(cpuid_entries);
+ if (kvm_x86_ops->nested_get_evmcs_version)
+ evmcs_ver = kvm_x86_ops->nested_get_evmcs_version(vcpu);
+
/* Skip NESTED_FEATURES if eVMCS is not supported */
if (!evmcs_ver)
--nent;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0aa158657f20..e904ff06a83d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -216,6 +216,9 @@ static void recalculate_apic_map(struct kvm *kvm)
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
new->phys_map[xapic_id] = apic;
+ if (!kvm_apic_sw_enabled(apic))
+ continue;
+
ldr = kvm_lapic_get_reg(apic, APIC_LDR);
if (apic_x2apic_mode(apic)) {
@@ -258,6 +261,8 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
static_key_slow_dec_deferred(&apic_sw_disabled);
else
static_key_slow_inc(&apic_sw_disabled.key);
+
+ recalculate_apic_map(apic->vcpu->kvm);
}
}
@@ -1548,7 +1553,6 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
static void apic_timer_expired(struct kvm_lapic *apic)
{
struct kvm_vcpu *vcpu = apic->vcpu;
- struct swait_queue_head *q = &vcpu->wq;
struct kvm_timer *ktimer = &apic->lapic_timer;
if (atomic_read(&apic->lapic_timer.pending))
@@ -1566,13 +1570,6 @@ static void apic_timer_expired(struct kvm_lapic *apic)
atomic_inc(&apic->lapic_timer.pending);
kvm_set_pending_timer(vcpu);
-
- /*
- * For x86, the atomic_inc() is serialized, thus
- * using swait_active() is safe.
- */
- if (swait_active(q))
- swake_up_one(q);
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8f72526e2f68..a63964e7cec7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2095,6 +2095,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
if (!direct)
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
+
+ /*
+ * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
+ * depends on valid pages being added to the head of the list. See
+ * comments in kvm_zap_obsolete_pages().
+ */
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
kvm_mod_used_mmu_pages(vcpu->kvm, +1);
return sp;
@@ -2244,7 +2250,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
#define for_each_valid_sp(_kvm, _sp, _gfn) \
hlist_for_each_entry(_sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
- if ((_sp)->role.invalid) { \
+ if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \
} else
#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
@@ -2301,6 +2307,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
static void mmu_audit_disable(void) { }
#endif
+static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
+}
+
static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
@@ -2525,6 +2536,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
if (level > PT_PAGE_TABLE_LEVEL && need_sync)
flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
}
+ sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
clear_page(sp->spt);
trace_kvm_mmu_get_page(sp, true);
@@ -3466,7 +3478,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
/*
* Currently, fast page fault only works for direct mapping
* since the gfn is not stable for indirect shadow page. See
- * Documentation/virtual/kvm/locking.txt to get more detail.
+ * Documentation/virt/kvm/locking.txt to get more detail.
*/
fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
iterator.sptep, spte,
@@ -4233,6 +4245,13 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3,
return false;
if (cached_root_available(vcpu, new_cr3, new_role)) {
+ /*
+ * It is possible that the cached previous root page is
+ * obsolete because of a change in the MMU generation
+ * number. However, changing the generation number is
+ * accompanied by KVM_REQ_MMU_RELOAD, which will free
+ * the root set here and allocate a new one.
+ */
kvm_make_request(KVM_REQ_LOAD_CR3, vcpu);
if (!skip_tlb_flush) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
@@ -5649,44 +5668,91 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
return alloc_mmu_pages(vcpu);
}
-static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- struct kvm_page_track_notifier_node *node)
+
+static void kvm_zap_obsolete_pages(struct kvm *kvm)
{
- struct kvm_mmu_page *sp;
+ struct kvm_mmu_page *sp, *node;
LIST_HEAD(invalid_list);
- unsigned long i;
- bool flush;
- gfn_t gfn;
-
- spin_lock(&kvm->mmu_lock);
-
- if (list_empty(&kvm->arch.active_mmu_pages))
- goto out_unlock;
-
- flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false);
+ int ign;
- for (i = 0; i < slot->npages; i++) {
- gfn = slot->base_gfn + i;
+restart:
+ list_for_each_entry_safe_reverse(sp, node,
+ &kvm->arch.active_mmu_pages, link) {
+ /*
+ * No obsolete valid page exists before a newly created page
+ * since active_mmu_pages is a FIFO list.
+ */
+ if (!is_obsolete_sp(kvm, sp))
+ break;
- for_each_valid_sp(kvm, sp, gfn) {
- if (sp->gfn != gfn)
- continue;
+ /*
+ * Do not repeatedly zap a root page to avoid unnecessary
+ * KVM_REQ_MMU_RELOAD, otherwise we may not be able to
+ * progress:
+ * vcpu 0 vcpu 1
+ * call vcpu_enter_guest():
+ * 1): handle KVM_REQ_MMU_RELOAD
+ * and require mmu-lock to
+ * load mmu
+ * repeat:
+ * 1): zap root page and
+ * send KVM_REQ_MMU_RELOAD
+ *
+ * 2): if (cond_resched_lock(mmu-lock))
+ *
+ * 2): hold mmu-lock and load mmu
+ *
+ * 3): see KVM_REQ_MMU_RELOAD bit
+ * on vcpu->requests is set
+ * then return 1 to call
+ * vcpu_enter_guest() again.
+ * goto repeat;
+ *
+ * Since we are reversely walking the list and the invalid
+ * list will be moved to the head, skip the invalid page
+ * can help us to avoid the infinity list walking.
+ */
+ if (sp->role.invalid)
+ continue;
- kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
- }
if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
- kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
- flush = false;
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
cond_resched_lock(&kvm->mmu_lock);
+ goto restart;
}
+
+ if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
+ goto restart;
}
- kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
-out_unlock:
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+}
+
+/*
+ * Fast invalidate all shadow pages and use lock-break technique
+ * to zap obsolete pages.
+ *
+ * It's required when memslot is being deleted or VM is being
+ * destroyed, in these cases, we should ensure that KVM MMU does
+ * not use any resource of the being-deleted slot or all slots
+ * after calling the function.
+ */
+static void kvm_mmu_zap_all_fast(struct kvm *kvm)
+{
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.mmu_valid_gen++;
+
+ kvm_zap_obsolete_pages(kvm);
spin_unlock(&kvm->mmu_lock);
}
+static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ struct kvm_page_track_notifier_node *node)
+{
+ kvm_mmu_zap_all_fast(kvm);
+}
+
void kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 19f69df96758..e0368076a1ef 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1714,7 +1714,6 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
if (!entry)
return -EINVAL;
- new_entry = READ_ONCE(*entry);
new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
@@ -2143,12 +2142,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
goto out;
}
+ svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+ GFP_KERNEL_ACCOUNT);
+ if (!svm->vcpu.arch.user_fpu) {
+ printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
+ err = -ENOMEM;
+ goto free_partial_svm;
+ }
+
svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!svm->vcpu.arch.guest_fpu) {
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
err = -ENOMEM;
- goto free_partial_svm;
+ goto free_user_fpu;
}
err = kvm_vcpu_init(&svm->vcpu, kvm, id);
@@ -2211,6 +2218,8 @@ uninit:
kvm_vcpu_uninit(&svm->vcpu);
free_svm:
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
+free_user_fpu:
+ kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
free_partial_svm:
kmem_cache_free(kvm_vcpu_cache, svm);
out:
@@ -2241,6 +2250,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -5179,6 +5189,11 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
kvm_vcpu_wake_up(vcpu);
}
+static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
{
unsigned long flags;
@@ -7113,12 +7128,6 @@ failed:
return ret;
}
-static uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
-{
- /* Not supported */
- return 0;
-}
-
static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version)
{
@@ -7303,6 +7312,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.pmu_ops = &amd_pmu_ops,
.deliver_posted_interrupt = svm_deliver_avic_intr,
+ .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
.update_pi_irte = svm_update_pi_irte,
.setup_mce = svm_setup_mce,
@@ -7316,7 +7326,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.mem_enc_unreg_region = svm_unregister_enc_region,
.nested_enable_evmcs = nested_enable_evmcs,
- .nested_get_evmcs_version = nested_get_evmcs_version,
+ .nested_get_evmcs_version = NULL,
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
};
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0f1378789bd0..a3cba321b5c5 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -220,6 +220,8 @@ static void free_nested(struct kvm_vcpu *vcpu)
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
return;
+ kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+
vmx->nested.vmxon = false;
vmx->nested.smm.vmxon = false;
free_vpid(vmx->nested.vpid02);
@@ -232,7 +234,9 @@ static void free_nested(struct kvm_vcpu *vcpu)
vmx->vmcs01.shadow_vmcs = NULL;
}
kfree(vmx->nested.cached_vmcs12);
+ vmx->nested.cached_vmcs12 = NULL;
kfree(vmx->nested.cached_shadow_vmcs12);
+ vmx->nested.cached_shadow_vmcs12 = NULL;
/* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) {
kvm_release_page_dirty(vmx->nested.apic_access_page);
@@ -4536,6 +4540,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
int len;
gva_t gva = 0;
struct vmcs12 *vmcs12;
+ struct x86_exception e;
short offset;
if (!nested_vmx_check_permission(vcpu))
@@ -4584,7 +4589,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
vmx_instruction_info, true, len, &gva))
return 1;
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
- kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL);
+ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e))
+ kvm_inject_page_fault(vcpu, &e);
}
return nested_vmx_succeed(vcpu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a279447eb75b..c030c96fc81a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6117,6 +6117,11 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
return max_irr;
}
+static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+ return pi_test_on(vcpu_to_pi_desc(vcpu));
+}
+
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
{
if (!kvm_vcpu_apicv_active(vcpu))
@@ -6598,6 +6603,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
free_loaded_vmcs(vmx->loaded_vmcs);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
}
@@ -6613,12 +6619,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!vmx)
return ERR_PTR(-ENOMEM);
+ vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+ GFP_KERNEL_ACCOUNT);
+ if (!vmx->vcpu.arch.user_fpu) {
+ printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
+ err = -ENOMEM;
+ goto free_partial_vcpu;
+ }
+
vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!vmx->vcpu.arch.guest_fpu) {
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
err = -ENOMEM;
- goto free_partial_vcpu;
+ goto free_user_fpu;
}
vmx->vpid = allocate_vpid();
@@ -6721,6 +6735,8 @@ uninit_vcpu:
free_vcpu:
free_vpid(vmx->vpid);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
+free_user_fpu:
+ kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
free_partial_vcpu:
kmem_cache_free(kvm_vcpu_cache, vmx);
return ERR_PTR(err);
@@ -7715,6 +7731,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+ .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
.set_tss_addr = vmx_set_tss_addr,
.set_identity_map_addr = vmx_set_identity_map_addr,
@@ -7780,6 +7797,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.set_nested_state = NULL,
.get_vmcs12_pages = NULL,
.nested_enable_evmcs = NULL,
+ .nested_get_evmcs_version = NULL,
.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
};
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 58305cf81182..91602d310a3f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3306,6 +3306,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_x86_ops->vcpu_load(vcpu, cpu);
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -5308,6 +5312,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
/* kvm_write_guest_virt_system can pull in tons of pages. */
vcpu->arch.l1tf_flush_l1d = true;
+ /*
+ * FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
+ * is returned, but our callers are not ready for that and they blindly
+ * call kvm_inject_page_fault. Ensure that they at least do not leak
+ * uninitialized kernel stack memory into cr2 and error code.
+ */
+ memset(exception, 0, sizeof(*exception));
return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
PFERR_WRITE_MASK, exception);
}
@@ -6590,12 +6601,13 @@ restart:
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
- kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE && ctxt->tf)
- kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
- exception_type(ctxt->exception.vector) == EXCPT_TRAP)
+ exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
+ kvm_rip_write(vcpu, ctxt->eip);
+ if (r == EMULATE_DONE && ctxt->tf)
+ kvm_vcpu_do_singlestep(vcpu, &r);
__kvm_set_rflags(vcpu, ctxt->eflags);
+ }
/*
* For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@ -7202,7 +7214,7 @@ static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
rcu_read_unlock();
- if (target)
+ if (target && READ_ONCE(target->ready))
kvm_vcpu_yield_to(target);
}
@@ -7242,6 +7254,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
break;
case KVM_HC_KICK_CPU:
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
+ kvm_sched_yield(vcpu->kvm, a1);
ret = 0;
break;
#ifdef CONFIG_X86_64
@@ -7990,9 +8003,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
trace_kvm_entry(vcpu->vcpu_id);
guest_enter_irqoff();
- fpregs_assert_state_consistent();
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_return();
+ /* The preempt notifier should have taken care of the FPU already. */
+ WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
@@ -8270,7 +8282,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();
- copy_fpregs_to_fpstate(&current->thread.fpu);
+ copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
@@ -8287,7 +8299,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
fpregs_lock();
copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
- copy_kernel_to_fpregs(&current->thread.fpu.state);
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
fpregs_mark_activate();
fpregs_unlock();
@@ -9694,6 +9706,22 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
+ return true;
+
+ if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+ kvm_test_request(KVM_REQ_SMI, vcpu) ||
+ kvm_test_request(KVM_REQ_EVENT, vcpu))
+ return true;
+
+ if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
+ return true;
+
+ return false;
+}
+
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
{
return vcpu->arch.preempted_in_kernel;
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 4fe1601dbc5d..86976b55ae74 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -33,7 +33,7 @@
102:
.section .fixup,"ax"
103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
+ jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(100b, 103b)
@@ -113,7 +113,7 @@ ENTRY(copy_user_generic_unrolled)
40: leal (%rdx,%rcx,8),%edx
jmp 60f
50: movl %ecx,%edx
-60: jmp copy_user_handle_tail /* ecx is zerorest also */
+60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
_ASM_EXTABLE_UA(1b, 30b)
@@ -177,7 +177,7 @@ ENTRY(copy_user_generic_string)
.section .fixup,"ax"
11: leal (%rdx,%rcx,8),%ecx
12: movl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
+ jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, 11b)
@@ -210,7 +210,7 @@ ENTRY(copy_user_enhanced_fast_string)
.section .fixup,"ax"
12: movl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
+ jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, 12b)
@@ -231,7 +231,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* eax uncopied bytes or 0 if successful.
*/
ALIGN;
-copy_user_handle_tail:
+.Lcopy_user_handle_tail:
movl %edx,%ecx
1: rep movsb
2: mov %ecx,%eax
@@ -239,7 +239,7 @@ copy_user_handle_tail:
ret
_ASM_EXTABLE_UA(1b, 2b)
-END(copy_user_handle_tail)
+END(.Lcopy_user_handle_tail)
/*
* copy_user_nocache - Uncached memory copy with exception handling
@@ -364,7 +364,7 @@ ENTRY(__copy_user_nocache)
movl %ecx,%edx
.L_fixup_handle_tail:
sfence
- jmp copy_user_handle_tail
+ jmp .Lcopy_user_handle_tail
.previous
_ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
diff --git a/arch/x86/lib/cpu.c b/arch/x86/lib/cpu.c
index 04967cdce5d1..7ad68917a51e 100644
--- a/arch/x86/lib/cpu.c
+++ b/arch/x86/lib/cpu.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/types.h>
#include <linux/export.h>
+#include <asm/cpu.h>
unsigned int x86_family(unsigned int sig)
{
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 304f958c27b2..9578eb88fc87 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -115,7 +115,7 @@ ENDPROC(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
-bad_get_user_clac:
+.Lbad_get_user_clac:
ASM_CLAC
bad_get_user:
xor %edx,%edx
@@ -123,7 +123,7 @@ bad_get_user:
ret
#ifdef CONFIG_X86_32
-bad_get_user_8_clac:
+.Lbad_get_user_8_clac:
ASM_CLAC
bad_get_user_8:
xor %edx,%edx
@@ -132,12 +132,12 @@ bad_get_user_8:
ret
#endif
- _ASM_EXTABLE_UA(1b, bad_get_user_clac)
- _ASM_EXTABLE_UA(2b, bad_get_user_clac)
- _ASM_EXTABLE_UA(3b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
+ _ASM_EXTABLE_UA(2b, .Lbad_get_user_clac)
+ _ASM_EXTABLE_UA(3b, .Lbad_get_user_clac)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(4b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(4b, .Lbad_get_user_clac)
#else
- _ASM_EXTABLE_UA(4b, bad_get_user_8_clac)
- _ASM_EXTABLE_UA(5b, bad_get_user_8_clac)
+ _ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac)
+ _ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 14bf78341d3c..126dd6a9ec9b 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -37,7 +37,7 @@
ENTRY(__put_user_1)
ENTER
cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
- jae bad_put_user
+ jae .Lbad_put_user
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %eax,%eax
@@ -51,7 +51,7 @@ ENTRY(__put_user_2)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $1,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
- jae bad_put_user
+ jae .Lbad_put_user
ASM_STAC
2: movw %ax,(%_ASM_CX)
xor %eax,%eax
@@ -65,7 +65,7 @@ ENTRY(__put_user_4)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $3,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
- jae bad_put_user
+ jae .Lbad_put_user
ASM_STAC
3: movl %eax,(%_ASM_CX)
xor %eax,%eax
@@ -79,7 +79,7 @@ ENTRY(__put_user_8)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $7,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX
- jae bad_put_user
+ jae .Lbad_put_user
ASM_STAC
4: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
@@ -91,16 +91,16 @@ ENTRY(__put_user_8)
ENDPROC(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
-bad_put_user_clac:
+.Lbad_put_user_clac:
ASM_CLAC
-bad_put_user:
+.Lbad_put_user:
movl $-EFAULT,%eax
RET
- _ASM_EXTABLE_UA(1b, bad_put_user_clac)
- _ASM_EXTABLE_UA(2b, bad_put_user_clac)
- _ASM_EXTABLE_UA(3b, bad_put_user_clac)
- _ASM_EXTABLE_UA(4b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
+ _ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE_UA(5b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
#endif
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index 6b468517ab71..73dc66d887f3 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -178,13 +178,15 @@ void FPU_printall(void)
for (i = 0; i < 8; i++) {
FPU_REG *r = &st(i);
u_char tagi = FPU_gettagi(i);
+
switch (tagi) {
case TAG_Empty:
continue;
- break;
case TAG_Zero:
case TAG_Special:
+ /* Update tagi for the printk below */
tagi = FPU_Special(r);
+ /* fall through */
case TAG_Valid:
printk("st(%d) %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
getsign(r) ? '-' : '+',
@@ -198,7 +200,6 @@ void FPU_printall(void)
printk("Whoops! Error in errors.c: tag%d is %d ", i,
tagi);
continue;
- break;
}
printk("%s\n", tag_desc[(int)(unsigned)tagi]);
}
diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c
index 783c509f957a..127ea54122d7 100644
--- a/arch/x86/math-emu/fpu_trig.c
+++ b/arch/x86/math-emu/fpu_trig.c
@@ -1352,7 +1352,7 @@ static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
case TW_Denormal:
if (denormal_operand() < 0)
return;
-
+ /* fall through */
case TAG_Zero:
case TAG_Valid:
setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 6c46095cd0d9..9ceacd1156db 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -177,13 +177,14 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
pmd = pmd_offset(pud, address);
pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- return NULL;
- if (!pmd_present(*pmd))
+ if (pmd_present(*pmd) != pmd_present(*pmd_k))
set_pmd(pmd, *pmd_k);
+
+ if (!pmd_present(*pmd_k))
+ return NULL;
else
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k));
return pmd_k;
}
@@ -203,17 +204,13 @@ void vmalloc_sync_all(void)
spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
spinlock_t *pgt_lock;
- pmd_t *ret;
/* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- ret = vmalloc_sync_one(page_address(page), address);
+ vmalloc_sync_one(page_address(page), address);
spin_unlock(pgt_lock);
-
- if (!ret)
- break;
}
spin_unlock(&pgd_lock);
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4068abb9427f..930edeb41ec3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -916,7 +916,7 @@ static void mark_nxdata_nx(void)
if (__supported_pte_mask & _PAGE_NX)
printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
- set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
+ set_memory_nx(start, size >> PAGE_SHIFT);
}
void mark_rodata_ro(void)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 63e99f15d7cf..a39dcdb5ae34 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -19,6 +19,7 @@
#include <asm/set_memory.h>
#include <asm/e820/api.h>
+#include <asm/efi.h>
#include <asm/fixmap.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index e6dad600614c..4123100e0eaf 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -861,9 +861,9 @@ void numa_remove_cpu(int cpu)
*/
const struct cpumask *cpumask_of_node(int node)
{
- if (node >= nr_node_ids) {
+ if ((unsigned)node >= nr_node_ids) {
printk(KERN_WARNING
- "cpumask_of_node(%d): node > nr_node_ids(%u)\n",
+ "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n",
node, nr_node_ids);
dump_stack();
return cpu_none_mask;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 6a9a77a403c9..0d09cc5aad61 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -516,7 +516,7 @@ static inline void check_conflict(int warnlvl, pgprot_t prot, pgprotval_t val,
*/
static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
unsigned long pfn, unsigned long npg,
- int warnlvl)
+ unsigned long lpsize, int warnlvl)
{
pgprotval_t forbidden, res;
unsigned long end;
@@ -535,9 +535,17 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
check_conflict(warnlvl, prot, res, start, end, pfn, "Text NX");
forbidden = res;
- res = protect_kernel_text_ro(start, end);
- check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
- forbidden |= res;
+ /*
+ * Special case to preserve a large page. If the change spawns the
+ * full large page mapping then there is no point to split it
+ * up. Happens with ftrace and is going to be removed once ftrace
+ * switched to text_poke().
+ */
+ if (lpsize != (npg * PAGE_SIZE) || (start & (lpsize - 1))) {
+ res = protect_kernel_text_ro(start, end);
+ check_conflict(warnlvl, prot, res, start, end, pfn, "Text RO");
+ forbidden |= res;
+ }
/* Check the PFN directly */
res = protect_pci_bios(pfn, pfn + npg - 1);
@@ -819,7 +827,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
* extra conditional required here.
*/
chk_prot = static_protections(old_prot, lpaddr, old_pfn, numpages,
- CPA_CONFLICT);
+ psize, CPA_CONFLICT);
if (WARN_ON_ONCE(pgprot_val(chk_prot) != pgprot_val(old_prot))) {
/*
@@ -855,7 +863,7 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
* protection requirement in the large page.
*/
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
- CPA_DETECT);
+ psize, CPA_DETECT);
/*
* If there is a conflict, split the large page.
@@ -906,7 +914,8 @@ static void split_set_pte(struct cpa_data *cpa, pte_t *pte, unsigned long pfn,
if (!cpa->force_static_prot)
goto set;
- prot = static_protections(ref_prot, address, pfn, npg, CPA_PROTECT);
+ /* Hand in lpsize = 0 to enforce the protection mechanism */
+ prot = static_protections(ref_prot, address, pfn, npg, 0, CPA_PROTECT);
if (pgprot_val(prot) == pgprot_val(ref_prot))
goto set;
@@ -1503,7 +1512,8 @@ repeat:
pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
cpa_inc_4k_install();
- new_prot = static_protections(new_prot, address, pfn, 1,
+ /* Hand in lpsize = 0 to enforce the protection mechanism */
+ new_prot = static_protections(new_prot, address, pfn, 1, 0,
CPA_PROTECT);
new_prot = pgprot_clear_protnone_bits(new_prot);
@@ -1809,63 +1819,6 @@ out_err:
}
EXPORT_SYMBOL(set_memory_uc);
-static int _set_memory_array(unsigned long *addr, int numpages,
- enum page_cache_mode new_type)
-{
- enum page_cache_mode set_type;
- int i, j;
- int ret;
-
- for (i = 0; i < numpages; i++) {
- ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
- new_type, NULL);
- if (ret)
- goto out_free;
- }
-
- /* If WC, set to UC- first and then WC */
- set_type = (new_type == _PAGE_CACHE_MODE_WC) ?
- _PAGE_CACHE_MODE_UC_MINUS : new_type;
-
- ret = change_page_attr_set(addr, numpages,
- cachemode2pgprot(set_type), 1);
-
- if (!ret && new_type == _PAGE_CACHE_MODE_WC)
- ret = change_page_attr_set_clr(addr, numpages,
- cachemode2pgprot(
- _PAGE_CACHE_MODE_WC),
- __pgprot(_PAGE_CACHE_MASK),
- 0, CPA_ARRAY, NULL);
- if (ret)
- goto out_free;
-
- return 0;
-
-out_free:
- for (j = 0; j < i; j++)
- free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE);
-
- return ret;
-}
-
-int set_memory_array_uc(unsigned long *addr, int numpages)
-{
- return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_UC_MINUS);
-}
-EXPORT_SYMBOL(set_memory_array_uc);
-
-int set_memory_array_wc(unsigned long *addr, int numpages)
-{
- return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_WC);
-}
-EXPORT_SYMBOL(set_memory_array_wc);
-
-int set_memory_array_wt(unsigned long *addr, int numpages)
-{
- return _set_memory_array(addr, numpages, _PAGE_CACHE_MODE_WT);
-}
-EXPORT_SYMBOL_GPL(set_memory_array_wt);
-
int _set_memory_wc(unsigned long addr, int numpages)
{
int ret;
@@ -1905,23 +1858,6 @@ int _set_memory_wt(unsigned long addr, int numpages)
cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0);
}
-int set_memory_wt(unsigned long addr, int numpages)
-{
- int ret;
-
- ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
- _PAGE_CACHE_MODE_WT, NULL);
- if (ret)
- return ret;
-
- ret = _set_memory_wt(addr, numpages);
- if (ret)
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(set_memory_wt);
-
int _set_memory_wb(unsigned long addr, int numpages)
{
/* WB cache mode is hard wired to all cache attribute bits being 0 */
@@ -1942,24 +1878,6 @@ int set_memory_wb(unsigned long addr, int numpages)
}
EXPORT_SYMBOL(set_memory_wb);
-int set_memory_array_wb(unsigned long *addr, int numpages)
-{
- int i;
- int ret;
-
- /* WB cache mode is hard wired to all cache attribute bits being 0 */
- ret = change_page_attr_clear(addr, numpages,
- __pgprot(_PAGE_CACHE_MASK), 1);
- if (ret)
- return ret;
-
- for (i = 0; i < numpages; i++)
- free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
-
- return 0;
-}
-EXPORT_SYMBOL(set_memory_array_wb);
-
int set_memory_x(unsigned long addr, int numpages)
{
if (!(__supported_pte_mask & _PAGE_NX))
@@ -1967,7 +1885,6 @@ int set_memory_x(unsigned long addr, int numpages)
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
-EXPORT_SYMBOL(set_memory_x);
int set_memory_nx(unsigned long addr, int numpages)
{
@@ -1976,7 +1893,6 @@ int set_memory_nx(unsigned long addr, int numpages)
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
-EXPORT_SYMBOL(set_memory_nx);
int set_memory_ro(unsigned long addr, int numpages)
{
@@ -2180,22 +2096,6 @@ int set_pages_array_wb(struct page **pages, int numpages)
}
EXPORT_SYMBOL(set_pages_array_wb);
-int set_pages_x(struct page *page, int numpages)
-{
- unsigned long addr = (unsigned long)page_address(page);
-
- return set_memory_x(addr, numpages);
-}
-EXPORT_SYMBOL(set_pages_x);
-
-int set_pages_nx(struct page *page, int numpages)
-{
- unsigned long addr = (unsigned long)page_address(page);
-
- return set_memory_nx(addr, numpages);
-}
-EXPORT_SYMBOL(set_pages_nx);
-
int set_pages_ro(struct page *page, int numpages)
{
unsigned long addr = (unsigned long)page_address(page);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 4de9704c4aaf..e6a9edc5baaf 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -440,7 +440,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
if (next != real_prev) {
- load_mm_cr4(next);
+ load_mm_cr4_irqsoff(next);
switch_ldt(real_prev, next);
}
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index eaaed5bfc4a4..991549a1c5f3 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -390,8 +390,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog));
+ addrs[0] = prog - temp;
- for (i = 0; i < insn_cnt; i++, insn++) {
+ for (i = 1; i <= insn_cnt; i++, insn++) {
const s32 imm32 = insn->imm;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
@@ -1105,7 +1106,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
extra_pass = true;
goto skip_init_addrs;
}
- addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
+ addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
if (!addrs) {
prog = orig_prog;
goto out_addrs;
@@ -1115,7 +1116,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
* Before first pass, make a rough estimation of addrs[]
* each BPF instruction is translated to less than 64 bytes
*/
- for (proglen = 0, i = 0; i < prog->len; i++) {
+ for (proglen = 0, i = 0; i <= prog->len; i++) {
proglen += 64;
addrs[i] = proglen;
}
@@ -1180,7 +1181,7 @@ out_image:
if (!image || !prog->is_func || extra_pass) {
if (image)
- bpf_prog_fill_jited_linfo(prog, addrs);
+ bpf_prog_fill_jited_linfo(prog, addrs + 1);
out_addrs:
kfree(addrs);
kfree(jit_data);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 7389db538c30..6fa42e9c4e6f 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -29,6 +29,7 @@
static bool pci_mmcfg_running_state;
static bool pci_mmcfg_arch_init_failed;
static DEFINE_MUTEX(pci_mmcfg_lock);
+#define pci_mmcfg_lock_held() lock_is_held(&(pci_mmcfg_lock).dep_map)
LIST_HEAD(pci_mmcfg_list);
@@ -54,7 +55,7 @@ static void list_add_sorted(struct pci_mmcfg_region *new)
struct pci_mmcfg_region *cfg;
/* keep list sorted by segment and starting bus number */
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list) {
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held()) {
if (cfg->segment > new->segment ||
(cfg->segment == new->segment &&
cfg->start_bus >= new->start_bus)) {
@@ -118,7 +119,7 @@ struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus)
{
struct pci_mmcfg_region *cfg;
- list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list)
+ list_for_each_entry_rcu(cfg, &pci_mmcfg_list, list, pci_mmcfg_lock_held())
if (cfg->segment == segment &&
cfg->start_bus <= bus && bus <= cfg->end_bus)
return cfg;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a7189a3b4d70..c202e1b07e29 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -59,11 +59,34 @@ static efi_system_table_t efi_systab __initdata;
static efi_config_table_type_t arch_tables[] __initdata = {
#ifdef CONFIG_X86_UV
- {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
+ {UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys},
#endif
{NULL_GUID, NULL, NULL},
};
+static const unsigned long * const efi_tables[] = {
+ &efi.mps,
+ &efi.acpi,
+ &efi.acpi20,
+ &efi.smbios,
+ &efi.smbios3,
+ &efi.boot_info,
+ &efi.hcdp,
+ &efi.uga,
+#ifdef CONFIG_X86_UV
+ &uv_systab_phys,
+#endif
+ &efi.fw_vendor,
+ &efi.runtime,
+ &efi.config_table,
+ &efi.esrt,
+ &efi.properties_table,
+ &efi.mem_attr_table,
+#ifdef CONFIG_EFI_RCI2_TABLE
+ &rci2_table_phys,
+#endif
+};
+
u64 efi_setup; /* efi setup_data physical address */
static int add_efi_memmap __initdata;
@@ -1049,3 +1072,17 @@ static int __init arch_parse_efi_cmdline(char *str)
return 0;
}
early_param("efi", arch_parse_efi_cmdline);
+
+bool efi_is_table_address(unsigned long phys_addr)
+{
+ unsigned int i;
+
+ if (phys_addr == EFI_INVALID_TABLE_ADDR)
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+ if (*(efi_tables[i]) == phys_addr)
+ return true;
+
+ return false;
+}
diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c
index 2e796b54cbde..9e2444500428 100644
--- a/arch/x86/platform/intel/iosf_mbi.c
+++ b/arch/x86/platform/intel/iosf_mbi.c
@@ -17,6 +17,7 @@
#include <linux/debugfs.h>
#include <linux/capability.h>
#include <linux/pm_qos.h>
+#include <linux/wait.h>
#include <asm/iosf_mbi.h>
@@ -201,23 +202,45 @@ EXPORT_SYMBOL(iosf_mbi_available);
#define PUNIT_SEMAPHORE_BIT BIT(0)
#define PUNIT_SEMAPHORE_ACQUIRE BIT(1)
-static DEFINE_MUTEX(iosf_mbi_punit_mutex);
-static DEFINE_MUTEX(iosf_mbi_block_punit_i2c_access_count_mutex);
+static DEFINE_MUTEX(iosf_mbi_pmic_access_mutex);
static BLOCKING_NOTIFIER_HEAD(iosf_mbi_pmic_bus_access_notifier);
-static u32 iosf_mbi_block_punit_i2c_access_count;
+static DECLARE_WAIT_QUEUE_HEAD(iosf_mbi_pmic_access_waitq);
+static u32 iosf_mbi_pmic_punit_access_count;
+static u32 iosf_mbi_pmic_i2c_access_count;
static u32 iosf_mbi_sem_address;
static unsigned long iosf_mbi_sem_acquired;
static struct pm_qos_request iosf_mbi_pm_qos;
void iosf_mbi_punit_acquire(void)
{
- mutex_lock(&iosf_mbi_punit_mutex);
+ /* Wait for any I2C PMIC accesses from in kernel drivers to finish. */
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ while (iosf_mbi_pmic_i2c_access_count != 0) {
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+ wait_event(iosf_mbi_pmic_access_waitq,
+ iosf_mbi_pmic_i2c_access_count == 0);
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ }
+ /*
+ * We do not need to do anything to allow the PUNIT to safely access
+ * the PMIC, other then block in kernel accesses to the PMIC.
+ */
+ iosf_mbi_pmic_punit_access_count++;
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
}
EXPORT_SYMBOL(iosf_mbi_punit_acquire);
void iosf_mbi_punit_release(void)
{
- mutex_unlock(&iosf_mbi_punit_mutex);
+ bool do_wakeup;
+
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ iosf_mbi_pmic_punit_access_count--;
+ do_wakeup = iosf_mbi_pmic_punit_access_count == 0;
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+
+ if (do_wakeup)
+ wake_up(&iosf_mbi_pmic_access_waitq);
}
EXPORT_SYMBOL(iosf_mbi_punit_release);
@@ -256,34 +279,32 @@ static void iosf_mbi_reset_semaphore(void)
* already blocked P-Unit accesses because it wants them blocked over multiple
* i2c-transfers, for e.g. read-modify-write of an I2C client register.
*
- * The P-Unit accesses already being blocked is tracked through the
- * iosf_mbi_block_punit_i2c_access_count variable which is protected by the
- * iosf_mbi_block_punit_i2c_access_count_mutex this mutex is hold for the
- * entire duration of the function.
- *
- * If access is not blocked yet, this function takes the following steps:
+ * To allow safe PMIC i2c bus accesses this function takes the following steps:
*
* 1) Some code sends request to the P-Unit which make it access the PMIC
* I2C bus. Testing has shown that the P-Unit does not check its internal
* PMIC bus semaphore for these requests. Callers of these requests call
* iosf_mbi_punit_acquire()/_release() around their P-Unit accesses, these
- * functions lock/unlock the iosf_mbi_punit_mutex.
- * As the first step we lock the iosf_mbi_punit_mutex, to wait for any in
- * flight requests to finish and to block any new requests.
+ * functions increase/decrease iosf_mbi_pmic_punit_access_count, so first
+ * we wait for iosf_mbi_pmic_punit_access_count to become 0.
+ *
+ * 2) Check iosf_mbi_pmic_i2c_access_count, if access has already
+ * been blocked by another caller, we only need to increment
+ * iosf_mbi_pmic_i2c_access_count and we can skip the other steps.
*
- * 2) Some code makes such P-Unit requests from atomic contexts where it
+ * 3) Some code makes such P-Unit requests from atomic contexts where it
* cannot call iosf_mbi_punit_acquire() as that may sleep.
* As the second step we call a notifier chain which allows any code
* needing P-Unit resources from atomic context to acquire them before
* we take control over the PMIC I2C bus.
*
- * 3) When CPU cores enter C6 or C7 the P-Unit needs to talk to the PMIC
+ * 4) When CPU cores enter C6 or C7 the P-Unit needs to talk to the PMIC
* if this happens while the kernel itself is accessing the PMIC I2C bus
* the SoC hangs.
* As the third step we call pm_qos_update_request() to disallow the CPU
* to enter C6 or C7.
*
- * 4) The P-Unit has a PMIC bus semaphore which we can request to stop
+ * 5) The P-Unit has a PMIC bus semaphore which we can request to stop
* autonomous P-Unit tasks from accessing the PMIC I2C bus while we hold it.
* As the fourth and final step we request this semaphore and wait for our
* request to be acknowledged.
@@ -297,12 +318,18 @@ int iosf_mbi_block_punit_i2c_access(void)
if (WARN_ON(!mbi_pdev || !iosf_mbi_sem_address))
return -ENXIO;
- mutex_lock(&iosf_mbi_block_punit_i2c_access_count_mutex);
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
- if (iosf_mbi_block_punit_i2c_access_count > 0)
+ while (iosf_mbi_pmic_punit_access_count != 0) {
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
+ wait_event(iosf_mbi_pmic_access_waitq,
+ iosf_mbi_pmic_punit_access_count == 0);
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ }
+
+ if (iosf_mbi_pmic_i2c_access_count > 0)
goto success;
- mutex_lock(&iosf_mbi_punit_mutex);
blocking_notifier_call_chain(&iosf_mbi_pmic_bus_access_notifier,
MBI_PMIC_BUS_ACCESS_BEGIN, NULL);
@@ -330,10 +357,6 @@ int iosf_mbi_block_punit_i2c_access(void)
iosf_mbi_sem_acquired = jiffies;
dev_dbg(&mbi_pdev->dev, "P-Unit semaphore acquired after %ums\n",
jiffies_to_msecs(jiffies - start));
- /*
- * Success, keep iosf_mbi_punit_mutex locked till
- * iosf_mbi_unblock_punit_i2c_access() gets called.
- */
goto success;
}
@@ -344,15 +367,13 @@ int iosf_mbi_block_punit_i2c_access(void)
dev_err(&mbi_pdev->dev, "Error P-Unit semaphore timed out, resetting\n");
error:
iosf_mbi_reset_semaphore();
- mutex_unlock(&iosf_mbi_punit_mutex);
-
if (!iosf_mbi_get_sem(&sem))
dev_err(&mbi_pdev->dev, "P-Unit semaphore: %d\n", sem);
success:
if (!WARN_ON(ret))
- iosf_mbi_block_punit_i2c_access_count++;
+ iosf_mbi_pmic_i2c_access_count++;
- mutex_unlock(&iosf_mbi_block_punit_i2c_access_count_mutex);
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
return ret;
}
@@ -360,17 +381,20 @@ EXPORT_SYMBOL(iosf_mbi_block_punit_i2c_access);
void iosf_mbi_unblock_punit_i2c_access(void)
{
- mutex_lock(&iosf_mbi_block_punit_i2c_access_count_mutex);
+ bool do_wakeup = false;
- iosf_mbi_block_punit_i2c_access_count--;
- if (iosf_mbi_block_punit_i2c_access_count == 0) {
+ mutex_lock(&iosf_mbi_pmic_access_mutex);
+ iosf_mbi_pmic_i2c_access_count--;
+ if (iosf_mbi_pmic_i2c_access_count == 0) {
iosf_mbi_reset_semaphore();
- mutex_unlock(&iosf_mbi_punit_mutex);
dev_dbg(&mbi_pdev->dev, "punit semaphore held for %ums\n",
jiffies_to_msecs(jiffies - iosf_mbi_sem_acquired));
+ do_wakeup = true;
}
+ mutex_unlock(&iosf_mbi_pmic_access_mutex);
- mutex_unlock(&iosf_mbi_block_punit_i2c_access_count_mutex);
+ if (do_wakeup)
+ wake_up(&iosf_mbi_pmic_access_waitq);
}
EXPORT_SYMBOL(iosf_mbi_unblock_punit_i2c_access);
@@ -379,10 +403,10 @@ int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb)
int ret;
/* Wait for the bus to go inactive before registering */
- mutex_lock(&iosf_mbi_punit_mutex);
+ iosf_mbi_punit_acquire();
ret = blocking_notifier_chain_register(
&iosf_mbi_pmic_bus_access_notifier, nb);
- mutex_unlock(&iosf_mbi_punit_mutex);
+ iosf_mbi_punit_release();
return ret;
}
@@ -403,9 +427,9 @@ int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb)
int ret;
/* Wait for the bus to go inactive before unregistering */
- mutex_lock(&iosf_mbi_punit_mutex);
+ iosf_mbi_punit_acquire();
ret = iosf_mbi_unregister_pmic_bus_access_notifier_unlocked(nb);
- mutex_unlock(&iosf_mbi_punit_mutex);
+ iosf_mbi_punit_release();
return ret;
}
@@ -413,7 +437,7 @@ EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier);
void iosf_mbi_assert_punit_acquired(void)
{
- WARN_ON(!mutex_is_locked(&iosf_mbi_punit_mutex));
+ WARN_ON(iosf_mbi_pmic_punit_access_count == 0);
}
EXPORT_SYMBOL(iosf_mbi_assert_punit_acquired);
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 7c69652ffeea..c2ee31953372 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -14,6 +14,8 @@
#include <asm/uv/bios.h>
#include <asm/uv/uv_hub.h>
+unsigned long uv_systab_phys __ro_after_init = EFI_INVALID_TABLE_ADDR;
+
struct uv_systab *uv_systab;
static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
@@ -185,13 +187,13 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
void uv_bios_init(void)
{
uv_systab = NULL;
- if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
- !efi.uv_systab || efi_runtime_disabled()) {
+ if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
+ !uv_systab_phys || efi_runtime_disabled()) {
pr_crit("UV: UVsystab: missing\n");
return;
}
- uv_systab = ioremap(efi.uv_systab, sizeof(struct uv_systab));
+ uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab));
if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
pr_err("UV: UVsystab: bad signature!\n");
iounmap(uv_systab);
@@ -203,7 +205,7 @@ void uv_bios_init(void)
int size = uv_systab->size;
iounmap(uv_systab);
- uv_systab = ioremap(efi.uv_systab, size);
+ uv_systab = ioremap(uv_systab_phys, size);
if (!uv_systab) {
pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
return;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 20c389a91b80..5f0a96bf27a1 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1804,9 +1804,9 @@ static void pq_init(int node, int pnode)
plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
vp = kmalloc_node(plsize, GFP_KERNEL, node);
- pqp = (struct bau_pq_entry *)vp;
- BUG_ON(!pqp);
+ BUG_ON(!vp);
+ pqp = (struct bau_pq_entry *)vp;
cp = (char *)pqp + 31;
pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 24b079e94bc2..c9ef6a7a4a1a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -12,6 +12,7 @@
#include <linux/smp.h>
#include <linux/perf_event.h>
#include <linux/tboot.h>
+#include <linux/dmi.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
@@ -23,7 +24,7 @@
#include <asm/debugreg.h>
#include <asm/cpu.h>
#include <asm/mmu_context.h>
-#include <linux/dmi.h>
+#include <asm/cpu_device_id.h>
#ifdef CONFIG_X86_32
__visible unsigned long saved_context_ebx;
@@ -397,15 +398,14 @@ static int __init bsp_pm_check_init(void)
core_initcall(bsp_pm_check_init);
-static int msr_init_context(const u32 *msr_id, const int total_num)
+static int msr_build_context(const u32 *msr_id, const int num)
{
- int i = 0;
+ struct saved_msrs *saved_msrs = &saved_context.saved_msrs;
struct saved_msr *msr_array;
+ int total_num;
+ int i, j;
- if (saved_context.saved_msrs.array || saved_context.saved_msrs.num > 0) {
- pr_err("x86/pm: MSR quirk already applied, please check your DMI match table.\n");
- return -EINVAL;
- }
+ total_num = saved_msrs->num + num;
msr_array = kmalloc_array(total_num, sizeof(struct saved_msr), GFP_KERNEL);
if (!msr_array) {
@@ -413,19 +413,30 @@ static int msr_init_context(const u32 *msr_id, const int total_num)
return -ENOMEM;
}
- for (i = 0; i < total_num; i++) {
- msr_array[i].info.msr_no = msr_id[i];
+ if (saved_msrs->array) {
+ /*
+ * Multiple callbacks can invoke this function, so copy any
+ * MSR save requests from previous invocations.
+ */
+ memcpy(msr_array, saved_msrs->array,
+ sizeof(struct saved_msr) * saved_msrs->num);
+
+ kfree(saved_msrs->array);
+ }
+
+ for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) {
+ msr_array[i].info.msr_no = msr_id[j];
msr_array[i].valid = false;
msr_array[i].info.reg.q = 0;
}
- saved_context.saved_msrs.num = total_num;
- saved_context.saved_msrs.array = msr_array;
+ saved_msrs->num = total_num;
+ saved_msrs->array = msr_array;
return 0;
}
/*
- * The following section is a quirk framework for problematic BIOSen:
+ * The following sections are a quirk framework for problematic BIOSen:
* Sometimes MSRs are modified by the BIOSen after suspended to
* RAM, this might cause unexpected behavior after wakeup.
* Thus we save/restore these specified MSRs across suspend/resume
@@ -440,7 +451,7 @@ static int msr_initialize_bdw(const struct dmi_system_id *d)
u32 bdw_msr_id[] = { MSR_IA32_THERM_CONTROL };
pr_info("x86/pm: %s detected, MSR saving is needed during suspending.\n", d->ident);
- return msr_init_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
+ return msr_build_context(bdw_msr_id, ARRAY_SIZE(bdw_msr_id));
}
static const struct dmi_system_id msr_save_dmi_table[] = {
@@ -455,9 +466,58 @@ static const struct dmi_system_id msr_save_dmi_table[] = {
{}
};
+static int msr_save_cpuid_features(const struct x86_cpu_id *c)
+{
+ u32 cpuid_msr_id[] = {
+ MSR_AMD64_CPUID_FN_1,
+ };
+
+ pr_info("x86/pm: family %#hx cpu detected, MSR saving is needed during suspending.\n",
+ c->family);
+
+ return msr_build_context(cpuid_msr_id, ARRAY_SIZE(cpuid_msr_id));
+}
+
+static const struct x86_cpu_id msr_save_cpu_table[] = {
+ {
+ .vendor = X86_VENDOR_AMD,
+ .family = 0x15,
+ .model = X86_MODEL_ANY,
+ .feature = X86_FEATURE_ANY,
+ .driver_data = (kernel_ulong_t)msr_save_cpuid_features,
+ },
+ {
+ .vendor = X86_VENDOR_AMD,
+ .family = 0x16,
+ .model = X86_MODEL_ANY,
+ .feature = X86_FEATURE_ANY,
+ .driver_data = (kernel_ulong_t)msr_save_cpuid_features,
+ },
+ {}
+};
+
+typedef int (*pm_cpu_match_t)(const struct x86_cpu_id *);
+static int pm_cpu_check(const struct x86_cpu_id *c)
+{
+ const struct x86_cpu_id *m;
+ int ret = 0;
+
+ m = x86_match_cpu(msr_save_cpu_table);
+ if (m) {
+ pm_cpu_match_t fn;
+
+ fn = (pm_cpu_match_t)m->driver_data;
+ ret = fn(m);
+ }
+
+ return ret;
+}
+
static int pm_check_save_msr(void)
{
dmi_check_system(msr_save_dmi_table);
+ pm_cpu_check(msr_save_cpu_table);
+
return 0;
}
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 3cf302b26332..10fb42da0007 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -6,6 +6,9 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string
targets += $(purgatory-y)
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+$(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
$(call if_changed_rule,cc_o_c)
@@ -15,13 +18,39 @@ targets += purgatory.ro
KASAN_SANITIZE := n
KCOV_INSTRUMENT := n
+# These are adjustments to the compiler flags used for objects that
+# make up the standalone purgatory.ro
+
+PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel
+PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss
+
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
-# sure how to relocate those. Like kexec-tools, use custom flags.
+# sure how to relocate those.
+ifdef CONFIG_FUNCTION_TRACER
+PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_FTRACE)
+endif
+
+ifdef CONFIG_STACKPROTECTOR
+PURGATORY_CFLAGS_REMOVE += -fstack-protector
+endif
+
+ifdef CONFIG_STACKPROTECTOR_STRONG
+PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong
+endif
+
+ifdef CONFIG_RETPOLINE
+PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS)
+endif
+
+CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_purgatory.o += $(PURGATORY_CFLAGS)
+
+CFLAGS_REMOVE_sha256.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large
-KBUILD_CFLAGS += -m$(BITS)
-KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
+CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
+CFLAGS_string.o += $(PURGATORY_CFLAGS)
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 6d8d5a34c377..b607bda786f6 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -68,3 +68,9 @@ void purgatory(void)
}
copy_backup_region();
}
+
+/*
+ * Defined in order to reuse memcpy() and memset() from
+ * arch/x86/boot/compressed/string.c
+ */
+void warn(const char *msg) {}
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
deleted file mode 100644
index 01ad43873ad9..000000000000
--- a/arch/x86/purgatory/string.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Simple string functions.
- *
- * Copyright (C) 2014 Red Hat Inc.
- *
- * Author:
- * Vivek Goyal <vgoyal@redhat.com>
- */
-
-#include <linux/types.h>
-
-#include "../boot/string.c"
-
-void *memcpy(void *dst, const void *src, size_t len)
-{
- return __builtin_memcpy(dst, src, len);
-}
-
-void *memset(void *dst, int c, size_t len)
-{
- return __builtin_memset(dst, c, len);
-}
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index 30b0d30d861a..6363761cc74c 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -19,7 +19,6 @@ GLOBAL(real_mode_header)
.long pa_ro_end
/* SMP trampoline */
.long pa_trampoline_start
- .long pa_trampoline_status
.long pa_trampoline_header
#ifdef CONFIG_X86_64
.long pa_trampoline_pgd;
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S
index 2dd866c9e21e..1868b158480d 100644
--- a/arch/x86/realmode/rm/trampoline_32.S
+++ b/arch/x86/realmode/rm/trampoline_32.S
@@ -41,9 +41,6 @@ ENTRY(trampoline_start)
movl tr_start, %eax # where we need to go
- movl $0xA5A5A5A5, trampoline_status
- # write marker for master knows we're running
-
/*
* GDT tables in non default location kernel can be beyond 16MB and
* lgdt will not be able to load the address as in real mode default
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 24bb7598774e..aee2b45d83b8 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -49,9 +49,6 @@ ENTRY(trampoline_start)
mov %ax, %es
mov %ax, %ss
- movl $0xA5A5A5A5, trampoline_status
- # write marker for master knows we're running
-
# Setup stack
movl $rm_stack_end, %esp
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S
index 7c706772ab59..8d8208dcca24 100644
--- a/arch/x86/realmode/rm/trampoline_common.S
+++ b/arch/x86/realmode/rm/trampoline_common.S
@@ -2,7 +2,3 @@
.section ".rodata","a"
.balign 16
tr_idt: .fill 1, 6, 0
-
- .bss
- .balign 4
-GLOBAL(trampoline_status) .space 4