summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/processor.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm/processor.h')
-rw-r--r--arch/x86/include/asm/processor.h788
1 files changed, 284 insertions, 504 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 33051436c864..a24c7805acdb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -7,6 +7,7 @@
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
+struct io_bitmap;
struct vm86;
#include <asm/math_emu.h>
@@ -15,15 +16,18 @@ struct vm86;
#include <uapi/asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeatures.h>
+#include <asm/cpuid/api.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
-#include <asm/msr.h>
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
#include <asm/unwind_hints.h>
+#include <asm/vmxfeatures.h>
+#include <asm/vdso/processor.h>
+#include <asm/shstk.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -56,86 +60,135 @@ struct vm86;
# define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
-enum tlb_infos {
- ENTRIES,
- NR_INFO
-};
-
-extern u16 __read_mostly tlb_lli_4k[NR_INFO];
-extern u16 __read_mostly tlb_lli_2m[NR_INFO];
-extern u16 __read_mostly tlb_lli_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4k[NR_INFO];
-extern u16 __read_mostly tlb_lld_2m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_1g[NR_INFO];
+extern u16 __read_mostly tlb_lli_4k;
+extern u16 __read_mostly tlb_lli_2m;
+extern u16 __read_mostly tlb_lli_4m;
+extern u16 __read_mostly tlb_lld_4k;
+extern u16 __read_mostly tlb_lld_2m;
+extern u16 __read_mostly tlb_lld_4m;
+extern u16 __read_mostly tlb_lld_1g;
/*
- * CPU type and hardware bug flags. Kept separately for each CPU.
- * Members of this structure are referenced in head_32.S, so think twice
- * before touching them. [mj]
+ * CPU type and hardware bug flags. Kept separately for each CPU.
*/
+struct cpuinfo_topology {
+ // Real APIC ID read from the local APIC
+ u32 apicid;
+ // The initial APIC ID provided by CPUID
+ u32 initial_apicid;
+
+ // Physical package ID
+ u32 pkg_id;
+
+ // Physical die ID on AMD, Relative on Intel
+ u32 die_id;
+
+ // Compute unit ID - AMD specific
+ u32 cu_id;
+
+ // Core ID relative to the package
+ u32 core_id;
+
+ // Logical ID mappings
+ u32 logical_pkg_id;
+ u32 logical_die_id;
+ u32 logical_core_id;
+
+ // AMD Node ID and Nodes per Package info
+ u32 amd_node_id;
+
+ // Cache level topology IDs
+ u32 llc_id;
+ u32 l2c_id;
+
+ // Hardware defined CPU-type
+ union {
+ u32 cpu_type;
+ struct {
+ // CPUID.1A.EAX[23-0]
+ u32 intel_native_model_id :24;
+ // CPUID.1A.EAX[31-24]
+ u32 intel_type :8;
+ };
+ struct {
+ // CPUID 0x80000026.EBX
+ u32 amd_num_processors :16,
+ amd_power_eff_ranking :8,
+ amd_native_model_id :4,
+ amd_type :4;
+ };
+ };
+};
+
struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
+ union {
+ /*
+ * The particular ordering (low-to-high) of (vendor,
+ * family, model) is done in case range of models, like
+ * it is usually done on AMD, need to be compared.
+ */
+ struct {
+ __u8 x86_model;
+ /* CPU family */
+ __u8 x86;
+ /* CPU vendor */
+ __u8 x86_vendor;
+ __u8 x86_reserved;
+ };
+ /* combined vendor, family, model */
+ __u32 x86_vfm;
+ };
__u8 x86_stepping;
#ifdef CONFIG_X86_64
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
int x86_tlbsize;
#endif
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+ __u32 vmx_capability[NVMXINTS];
+#endif
__u8 x86_virt_bits;
__u8 x86_phys_bits;
- /* CPUID returned core id bits: */
- __u8 x86_coreid_bits;
- __u8 cu_id;
/* Max extended CPUID function supported: */
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
int cpuid_level;
- __u32 x86_capability[NCAPINTS + NBUGINTS];
+ /*
+ * Align to size of unsigned long because the x86_capability array
+ * is passed to bitops which require the alignment. Use unnamed
+ * union to enforce the array is aligned to size of unsigned long.
+ */
+ union {
+ __u32 x86_capability[NCAPINTS + NBUGINTS];
+ unsigned long x86_capability_alignment;
+ };
char x86_vendor_id[16];
char x86_model_id[64];
+ struct cpuinfo_topology topo;
/* in KB - valid for CPUS which support this call: */
unsigned int x86_cache_size;
int x86_cache_alignment; /* In bytes */
- /* Cache QoS architectural values: */
+ /* Cache QoS architectural values, valid only on the BSP: */
int x86_cache_max_rmid; /* max index */
int x86_cache_occ_scale; /* scale to bytes */
+ int x86_cache_mbm_width_offset;
int x86_power;
unsigned long loops_per_jiffy;
- /* cpuid returned max cores value: */
- u16 x86_max_cores;
- u16 apicid;
- u16 initial_apicid;
+ /* protected processor identification number */
+ u64 ppin;
u16 x86_clflush_size;
/* number of cores as seen by the OS: */
u16 booted_cores;
- /* Physical processor id: */
- u16 phys_proc_id;
- /* Logical processor id: */
- u16 logical_proc_id;
- /* Core id: */
- u16 cpu_core_id;
/* Index into per_cpu list: */
u16 cpu_index;
+ /* Is SMT active on this core? */
+ bool smt_active;
u32 microcode;
/* Address space bits used by the cache internally */
u8 x86_cache_bits;
unsigned initialized : 1;
} __randomize_layout;
-struct cpuid_regs {
- u32 eax, ebx, ecx, edx;
-};
-
-enum cpuid_regs_idx {
- CPUID_EAX = 0,
- CPUID_EBX,
- CPUID_ECX,
- CPUID_EDX,
-};
-
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
#define X86_VENDOR_AMD 2
@@ -144,7 +197,9 @@ enum cpuid_regs_idx {
#define X86_VENDOR_TRANSMETA 7
#define X86_VENDOR_NSC 8
#define X86_VENDOR_HYGON 9
-#define X86_VENDOR_NUM 10
+#define X86_VENDOR_ZHAOXIN 10
+#define X86_VENDOR_VORTEX 11
+#define X86_VENDOR_NUM 12
#define X86_VENDOR_UNKNOWN 0xff
@@ -154,17 +209,11 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
-extern struct x86_hw_tss doublefault_tss;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
-#ifdef CONFIG_SMP
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
-#else
-#define cpu_info boot_cpu_data
-#define cpu_data(cpu) boot_cpu_data
-#endif
extern const struct seq_operations cpuinfo_op;
@@ -177,51 +226,13 @@ static inline unsigned long long l1tf_pfn_limit(void)
return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
+void init_cpu_devs(void);
+void get_cpu_vendor(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
-extern void identify_boot_cpu(void);
-extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+extern void identify_secondary_cpu(unsigned int cpu);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
-#ifdef CONFIG_X86_32
-extern int have_cpuid_p(void);
-#else
-static inline int have_cpuid_p(void)
-{
- return 1;
-}
-#endif
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx)
- : "memory");
-}
-
-#define native_cpuid_reg(reg) \
-static inline unsigned int native_cpuid_##reg(unsigned int op) \
-{ \
- unsigned int eax = op, ebx, ecx = 0, edx; \
- \
- native_cpuid(&eax, &ebx, &ecx, &edx); \
- \
- return reg; \
-}
-
-/*
- * Native CPUID functions returning a single datum.
- */
-native_cpuid_reg(eax)
-native_cpuid_reg(ebx)
-native_cpuid_reg(ecx)
-native_cpuid_reg(edx)
-
/*
* Friendlier CR3 helpers.
*/
@@ -297,11 +308,6 @@ struct x86_hw_tss {
struct x86_hw_tss {
u32 reserved1;
u64 sp0;
-
- /*
- * We store cpu_current_top_of_stack in sp1 so it's always accessible.
- * Linux does not use ring 1, so sp1 is not otherwise needed.
- */
u64 sp1;
/*
@@ -325,26 +331,56 @@ struct x86_hw_tss {
* IO-bitmap sizes:
*/
#define IO_BITMAP_BITS 65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET 0x8000
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))
+
+#define IO_BITMAP_OFFSET_VALID_MAP \
+ (offsetof(struct tss_struct, io_bitmap.bitmap) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL \
+ (offsetof(struct tss_struct, io_bitmap.mapall) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end of the
+ * iobitmap. The limit is inclusive, i.e. the last valid byte.
+ */
+# define __KERNEL_TSS_LIMIT \
+ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
+ sizeof(unsigned long) - 1)
+#else
+# define __KERNEL_TSS_LIMIT \
+ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
+#endif
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
struct entry_stack {
- unsigned long words[64];
+ char stack[PAGE_SIZE];
};
struct entry_stack_page {
struct entry_stack stack;
} __aligned(PAGE_SIZE);
-struct tss_struct {
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap {
+ /* The sequence number of the last active bitmap. */
+ u64 prev_sequence;
+
/*
- * The fixed hardware portion. This must not cross a page boundary
- * at risk of violating the SDM's advice and potentially triggering
- * errata.
+ * Store the dirty size of the last io bitmap offender. The next
+ * one will have to do the cleanup as the switch out to a non io
+ * bitmap user will just set x86_tss.io_bitmap_base to a value
+ * outside of the TSS limit. So for sane tasks there is no need to
+ * actually touch the io_bitmap at all.
*/
- struct x86_hw_tss x86_tss;
+ unsigned int prev_max;
/*
* The extra 1 is there because the CPU will access an
@@ -352,101 +388,63 @@ struct tss_struct {
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
- unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+ unsigned long bitmap[IO_BITMAP_LONGS + 1];
+
+ /*
+ * Special I/O bitmap to emulate IOPL(3). All bytes zero,
+ * except the additional byte at the end.
+ */
+ unsigned long mapall[IO_BITMAP_LONGS + 1];
+};
+
+struct tss_struct {
+ /*
+ * The fixed hardware portion. This must not cross a page boundary
+ * at risk of violating the SDM's advice and potentially triggering
+ * errata.
+ */
+ struct x86_hw_tss x86_tss;
+
+ struct x86_io_bitmap io_bitmap;
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT \
- (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
+/* Per CPU interrupt stacks */
+struct irq_stack {
+ char stack[IRQ_STACK_SIZE];
+} __aligned(IRQ_STACK_SIZE);
-#ifdef CONFIG_X86_32
-DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr);
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse);
#else
-/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
-#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr);
#endif
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
- unsigned long ist[7];
-};
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override,
+ const_cpu_current_top_of_stack);
#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
-union irq_stack_union {
- char irq_stack[IRQ_STACK_SIZE];
- /*
- * GCC hardcodes the stack canary as %gs:40. Since the
- * irq_stack is the object at %gs:0, we reserve the bottom
- * 48 bytes of the irq stack for the canary.
- */
- struct {
- char gs_base[40];
- unsigned long stack_canary;
- };
-};
-
-DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_union);
-
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+#ifdef CONFIG_SMP
+ return per_cpu_offset(cpu);
+#else
+ return 0;
+#endif
}
-DECLARE_PER_CPU(char *, irq_stack_ptr);
-DECLARE_PER_CPU(unsigned int, irq_count);
-extern asmlinkage void ignore_sysret(void);
+extern asmlinkage void entry_SYSCALL32_ignore(void);
-#if IS_ENABLED(CONFIG_KVM)
/* Save actual FS/GS selectors and bases to current->thread */
-void save_fsgs_for_kvm(void);
-#endif
-#else /* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-/*
- * Make sure stack canary segment base is cached-aligned:
- * "For Intel Atom processors, avoid non zero segment base address
- * that is not aligned to cache line boundary at all cost."
- * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
- */
-struct stack_canary {
- char __pad[20]; /* canary at %gs:20 */
- unsigned long canary;
-};
-DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
-#endif
-/*
- * per-CPU IRQ handling stacks
- */
-struct irq_stack {
- u32 stack[THREAD_SIZE/sizeof(u32)];
-} __aligned(THREAD_SIZE);
-
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
+void current_save_fsgs(void);
#endif /* X86_64 */
-extern unsigned int fpu_kernel_xstate_size;
-extern unsigned int fpu_user_xstate_size;
-
struct perf_event;
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -478,7 +476,7 @@ struct thread_struct {
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
- unsigned long debugreg6;
+ unsigned long virtual_dr6;
/* Keep track of the exact dr7 value set by the user */
unsigned long ptrace_dr7;
/* Fault info: */
@@ -490,58 +488,46 @@ struct thread_struct {
struct vm86 *vm86;
#endif
/* IO permissions: */
- unsigned long *io_bitmap_ptr;
- unsigned long iopl;
- /* Max allowed port in the bitmap, in bytes: */
- unsigned io_bitmap_max;
+ struct io_bitmap *io_bitmap;
- mm_segment_t addr_limit;
+ /*
+ * IOPL. Privilege level dependent I/O permission which is
+ * emulated via the I/O bitmap to prevent user space from disabling
+ * interrupts.
+ */
+ unsigned long iopl_emul;
- unsigned int sig_on_uaccess_err:1;
- unsigned int uaccess_err:1; /* uaccess failed */
+ unsigned int iopl_warn:1;
- /* Floating point and extended processor state */
- struct fpu fpu;
/*
- * WARNING: 'fpu' is dynamically-sized. It *MUST* be at
- * the end.
+ * Protection Keys Register for Userspace. Loaded immediately on
+ * context switch. Store it in thread_struct to avoid a lookup in
+ * the tasks's FPU xstate buffer. This value is only valid when a
+ * task is scheduled out. For 'current' the authoritative source of
+ * PKRU is the hardware itself.
*/
+ u32 pkru;
+
+#ifdef CONFIG_X86_USER_SHADOW_STACK
+ unsigned long features;
+ unsigned long features_locked;
+
+ struct thread_shstk shstk;
+#endif
};
-/* Whitelist the FPU state from the task_struct for hardened usercopy. */
-static inline void arch_thread_struct_whitelist(unsigned long *offset,
- unsigned long *size)
-{
- *offset = offsetof(struct thread_struct, fpu.state);
- *size = fpu_kernel_xstate_size;
-}
+#ifdef CONFIG_X86_DEBUG_FPU
+extern struct fpu *x86_task_fpu(struct task_struct *task);
+#else
+# define x86_task_fpu(task) ((struct fpu *)((void *)(task) + sizeof(*(task))))
+#endif
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
+extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
+static inline void arch_thread_struct_whitelist(unsigned long *offset,
+ unsigned long *size)
{
-#ifdef CONFIG_X86_32
- unsigned int reg;
-
- asm volatile ("pushfl;"
- "popl %0;"
- "andl %1, %0;"
- "orl %2, %0;"
- "pushl %0;"
- "popfl"
- : "=&r" (reg)
- : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-#endif
+ fpu_thread_struct_whitelist(offset, size);
}
static inline void
@@ -550,24 +536,27 @@ native_load_sp0(unsigned long sp0)
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
-static inline void native_swapgs(void)
+static __always_inline void native_swapgs(void)
{
#ifdef CONFIG_X86_64
asm volatile("swapgs" ::: "memory");
#endif
}
-static inline unsigned long current_top_of_stack(void)
+static __always_inline unsigned long current_top_of_stack(void)
{
/*
* We can't read directly from tss.sp0: sp0 on x86_32 is special in
* and around vm86 mode and sp0 on x86_64 is special because of the
* entry trampoline.
*/
+ if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
+ return this_cpu_read_const(const_cpu_current_top_of_stack);
+
return this_cpu_read_stable(cpu_current_top_of_stack);
}
-static inline bool on_thread_stack(void)
+static __always_inline bool on_thread_stack(void)
{
return (unsigned long)(current_top_of_stack() -
current_stack_pointer) < THREAD_SIZE;
@@ -576,162 +565,17 @@ static inline bool on_thread_stack(void)
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
-#define __cpuid native_cpuid
static inline void load_sp0(unsigned long sp0)
{
native_load_sp0(sp0);
}
-#define set_iopl_mask native_set_iopl_mask
#endif /* CONFIG_PARAVIRT_XXL */
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
-unsigned long get_wchan(struct task_struct *p);
-
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = 0;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return eax;
-}
-
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return ebx;
-}
-
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return ecx;
-}
-
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return edx;
-}
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static __always_inline void rep_nop(void)
-{
- asm volatile("rep; nop" ::: "memory");
-}
-
-static __always_inline void cpu_relax(void)
-{
- rep_nop();
-}
-
-/*
- * This function forces the icache and prefetched instruction stream to
- * catch up with reality in two very specific cases:
- *
- * a) Text was modified using one virtual address and is about to be executed
- * from the same physical page at a different virtual address.
- *
- * b) Text was modified on a different CPU, may subsequently be
- * executed on this CPU, and you want to make sure the new version
- * gets executed. This generally means you're calling this in a IPI.
- *
- * If you're calling this for a different reason, you're probably doing
- * it wrong.
- */
-static inline void sync_core(void)
-{
- /*
- * There are quite a few ways to do this. IRET-to-self is nice
- * because it works on every CPU, at any CPL (so it's compatible
- * with paravirtualization), and it never exits to a hypervisor.
- * The only down sides are that it's a bit slow (it seems to be
- * a bit more than 2x slower than the fastest options) and that
- * it unmasks NMIs. The "push %cs" is needed because, in
- * paravirtual environments, __KERNEL_CS may not be a valid CS
- * value when we do IRET directly.
- *
- * In case NMI unmasking or performance ever becomes a problem,
- * the next best option appears to be MOV-to-CR2 and an
- * unconditional jump. That sequence also works on all CPUs,
- * but it will fault at CPL3 (i.e. Xen PV).
- *
- * CPUID is the conventional way, but it's nasty: it doesn't
- * exist on some 486-like CPUs, and it usually exits to a
- * hypervisor.
- *
- * Like all of Linux's memory ordering operations, this is a
- * compiler barrier as well.
- */
-#ifdef CONFIG_X86_32
- asm volatile (
- "pushfl\n\t"
- "pushl %%cs\n\t"
- "pushl $1f\n\t"
- "iret\n\t"
- "1:"
- : ASM_CALL_CONSTRAINT : : "memory");
-#else
- unsigned int tmp;
-
- asm volatile (
- UNWIND_HINT_SAVE
- "mov %%ss, %0\n\t"
- "pushq %q0\n\t"
- "pushq %%rsp\n\t"
- "addq $8, (%%rsp)\n\t"
- "pushfq\n\t"
- "mov %%cs, %0\n\t"
- "pushq %q0\n\t"
- "pushq $1f\n\t"
- "iretq\n\t"
- UNWIND_HINT_RESTORE
- "1:"
- : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
-#endif
-}
+unsigned long __get_wchan(struct task_struct *p);
-extern void select_idle_routine(const struct cpuinfo_x86 *c);
+extern void select_idle_routine(void);
extern void amd_e400_c1e_apic_setup(void);
extern unsigned long boot_option_idle_override;
@@ -740,40 +584,18 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
IDLE_POLL};
extern void enable_sep_cpu(void);
-extern int sysenter_setup(void);
-void early_trap_pf_init(void);
/* Defined in head.S */
extern struct desc_ptr early_gdt_descr;
-extern void switch_to_new_gdt(int);
+extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
-extern void load_percpu_segment(int);
extern void cpu_init(void);
-
-static inline unsigned long get_debugctlmsr(void)
-{
- unsigned long debugctlmsr = 0;
-
-#ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return 0;
-#endif
- rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-
- return debugctlmsr;
-}
-
-static inline void update_debugctlmsr(unsigned long debugctlmsr)
-{
-#ifndef CONFIG_X86_DEBUGCTLMSR
- if (boot_cpu_data.x86 < 6)
- return;
-#endif
- wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
-}
+extern void cpu_init_exception_handling(bool boot_cpu);
+extern void cpu_init_replace_early_idt(void);
+extern void cr4_init(void);
extern void set_task_blockstep(struct task_struct *task, bool on);
@@ -785,13 +607,12 @@ extern char ignore_fpu_irq;
#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
#ifdef CONFIG_X86_32
# define BASE_PREFETCH ""
# define ARCH_HAS_PREFETCH
#else
-# define BASE_PREFETCH "prefetcht0 %P1"
+# define BASE_PREFETCH "prefetcht0 %1"
#endif
/*
@@ -802,7 +623,7 @@ extern char ignore_fpu_irq;
*/
static inline void prefetch(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchnta %P1",
+ alternative_input(BASE_PREFETCH, "prefetchnta %1",
X86_FEATURE_XMM,
"m" (*(const char *)x));
}
@@ -812,18 +633,13 @@ static inline void prefetch(const void *x)
* Useful for spinlocks to avoid one state transition in the
* cache coherency protocol:
*/
-static inline void prefetchw(const void *x)
+static __always_inline void prefetchw(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchw %P1",
+ alternative_input(BASE_PREFETCH, "prefetchw %1",
X86_FEATURE_3DNOWPREFETCH,
"m" (*(const char *)x));
}
-static inline void spin_lock_prefetch(const void *x)
-{
- prefetchw(x);
-}
-
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
@@ -837,71 +653,18 @@ static inline void spin_lock_prefetch(const void *x)
})
#ifdef CONFIG_X86_32
-/*
- * User space process size: 3GB (default).
- */
-#define IA32_PAGE_OFFSET PAGE_OFFSET
-#define TASK_SIZE PAGE_OFFSET
-#define TASK_SIZE_LOW TASK_SIZE
-#define TASK_SIZE_MAX TASK_SIZE
-#define DEFAULT_MAP_WINDOW TASK_SIZE
-#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX STACK_TOP
-
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \
- .io_bitmap_ptr = NULL, \
- .addr_limit = KERNEL_DS, \
}
-#define KSTK_ESP(task) (task_pt_regs(task)->sp)
-
#else
-/*
- * User space process size. This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything executable
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen. This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
-#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
- 0xc0000000 : 0xFFFFe000)
-
-#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
- IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
-#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
- IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
- IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-
-#define STACK_TOP TASK_SIZE_LOW
-#define STACK_TOP_MAX TASK_SIZE_MAX
+extern unsigned long __top_init_kernel_stack[];
-#define INIT_THREAD { \
- .addr_limit = KERNEL_DS, \
+#define INIT_THREAD { \
+ .sp = (unsigned long)&__top_init_kernel_stack, \
}
-extern unsigned long KSTK_ESP(struct task_struct *task);
-
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
@@ -915,6 +678,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
/* Get/set a process' ability to use the timestamp counter instruction */
#define GET_TSC_CTL(adr) get_tsc_mode((adr))
@@ -925,50 +689,36 @@ extern int set_tsc_mode(unsigned int val);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
-/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
-#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
-
-#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(void);
-extern int mpx_disable_management(void);
-#else
-static inline int mpx_enable_management(void)
+static inline u32 per_cpu_llc_id(unsigned int cpu)
{
- return -EINVAL;
+ return per_cpu(cpu_info.topo.llc_id, cpu);
}
-static inline int mpx_disable_management(void)
+
+static inline u32 per_cpu_l2c_id(unsigned int cpu)
{
- return -EINVAL;
+ return per_cpu(cpu_info.topo.l2c_id, cpu);
}
-#endif /* CONFIG_X86_INTEL_MPX */
#ifdef CONFIG_CPU_SUP_AMD
-extern u16 amd_get_nb_id(int cpu);
-extern u32 amd_get_nodes_per_socket(void);
-#else
-static inline u16 amd_get_nb_id(int cpu) { return 0; }
-static inline u32 amd_get_nodes_per_socket(void) { return 0; }
-#endif
-
-static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+static __always_inline void amd_clear_divider(void)
{
- uint32_t base, eax, signature[3];
-
- for (base = 0x40000000; base < 0x40010000; base += 0x100) {
- cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
-
- if (!memcmp(sig, signature, 12) &&
- (leaves == 0 || ((eax - base) >= leaves)))
- return base;
- }
-
- return 0;
+ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+ :: "a" (0), "d" (0), "r" (1));
}
+extern void amd_check_microcode(void);
+#else
+static inline void amd_clear_divider(void) { }
+static inline void amd_check_microcode(void) { }
+#endif
+
extern unsigned long arch_align_stack(unsigned long sp);
void free_init_pages(const char *what, unsigned long begin, unsigned long end);
-extern void free_kernel_image_pages(void *begin, void *end);
+extern void free_kernel_image_pages(const char *what, void *begin, void *end);
void default_idle(void);
#ifdef CONFIG_XEN
@@ -977,12 +727,15 @@ bool xen_set_default_idle(void);
#define xen_set_default_idle 0
#endif
-void stop_this_cpu(void *dummy);
-void df_debug(struct pt_regs *regs, long error_code);
-void microcode_check(void);
+void __noreturn stop_this_cpu(void *dummy);
+void microcode_check(struct cpuinfo_x86 *prev_info);
+void store_cpu_caps(struct cpuinfo_x86 *info);
+
+DECLARE_PER_CPU(bool, cache_state_incoherent);
enum l1tf_mitigations {
L1TF_MITIGATION_OFF,
+ L1TF_MITIGATION_AUTO,
L1TF_MITIGATION_FLUSH_NOWARN,
L1TF_MITIGATION_FLUSH,
L1TF_MITIGATION_FLUSH_NOSMT,
@@ -992,4 +745,31 @@ enum l1tf_mitigations {
extern enum l1tf_mitigations l1tf_mitigation;
+enum mds_mitigations {
+ MDS_MITIGATION_OFF,
+ MDS_MITIGATION_AUTO,
+ MDS_MITIGATION_FULL,
+ MDS_MITIGATION_VMWERV,
+};
+
+extern bool gds_ucode_mitigated(void);
+
+/*
+ * Make previous memory operations globally visible before
+ * a WRMSR.
+ *
+ * MFENCE makes writes visible, but only affects load/store
+ * instructions. WRMSR is unfortunately not a load/store
+ * instruction and is unaffected by MFENCE. The LFENCE ensures
+ * that the WRMSR is not reordered.
+ *
+ * Most WRMSRs are full serializing instructions themselves and
+ * do not require this barrier. This is only required for the
+ * IA32_TSC_DEADLINE and X2APIC MSRs.
+ */
+static inline void weak_wrmsr_fence(void)
+{
+ alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
+}
+
#endif /* _ASM_X86_PROCESSOR_H */