diff options
Diffstat (limited to 'arch/x86/include/asm')
51 files changed, 795 insertions, 590 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8eb74cf386db..c8a7fc23f63c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -6,7 +6,7 @@ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> */ -#include <acpi/pdc_intel.h> +#include <acpi/proc_cap_intel.h> #include <asm/numa.h> #include <asm/fixmap.h> @@ -15,6 +15,7 @@ #include <asm/mpspec.h> #include <asm/x86_init.h> #include <asm/cpufeature.h> +#include <asm/irq_vectors.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -31,6 +32,7 @@ extern int acpi_skip_timer_override; extern int acpi_use_timer_override; extern int acpi_fix_pin2_polarity; extern int acpi_disable_cmcff; +extern bool acpi_int_src_ovr[NR_IRQS_LEGACY]; extern u8 acpi_sci_flags; extern u32 acpi_sci_override_gsi; @@ -100,23 +102,31 @@ static inline bool arch_has_acpi_pdc(void) c->x86_vendor == X86_VENDOR_CENTAUR); } -static inline void arch_acpi_set_pdc_bits(u32 *buf) +static inline void arch_acpi_set_proc_cap_bits(u32 *cap) { struct cpuinfo_x86 *c = &cpu_data(0); - buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; + *cap |= ACPI_PROC_CAP_C_CAPABILITY_SMP; + + /* Enable coordination with firmware's _TSD info */ + *cap |= ACPI_PROC_CAP_SMP_T_SWCOORD; if (cpu_has(c, X86_FEATURE_EST)) - buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SWSMP; if (cpu_has(c, X86_FEATURE_ACPI)) - buf[2] |= ACPI_PDC_T_FFH; + *cap |= ACPI_PROC_CAP_T_FFH; + + if (cpu_has(c, X86_FEATURE_HWP)) + *cap |= ACPI_PROC_CAP_COLLAB_PROC_PERF; /* - * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + * If mwait/monitor is unsupported, C_C1_FFH and + * C2/C3_FFH will be disabled. */ - if (!cpu_has(c, X86_FEATURE_MWAIT)) - buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); + if (!cpu_has(c, X86_FEATURE_MWAIT) || + boot_option_idle_override == IDLE_NOMWAIT) + *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH); } static inline bool acpi_has_cpu_in_madt(void) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 98c32aa5963a..5af4ec1a0f71 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -3,6 +3,7 @@ #define _ASM_X86_APIC_H #include <linux/cpumask.h> +#include <linux/static_call.h> #include <asm/alternative.h> #include <asm/cpufeature.h> @@ -40,11 +41,9 @@ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) -extern void generic_apic_probe(void); +extern void x86_32_probe_apic(void); #else -static inline void generic_apic_probe(void) -{ -} +static inline void x86_32_probe_apic(void) { } #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -52,7 +51,7 @@ static inline void generic_apic_probe(void) extern int apic_verbosity; extern int local_apic_timer_c2_ok; -extern int disable_apic; +extern bool apic_is_disabled; extern unsigned int lapic_timer_period; extern int cpuid_to_apicid[]; @@ -66,20 +65,6 @@ enum apic_intr_mode_id { APIC_SYMMETRIC_IO_NO_ROUTING }; -#ifdef CONFIG_SMP -extern void __inquire_remote_apic(int apicid); -#else /* CONFIG_SMP */ -static inline void __inquire_remote_apic(int apicid) -{ -} -#endif /* CONFIG_SMP */ - -static inline void default_inquire_remote_apic(int apicid) -{ - if (apic_verbosity >= APIC_DEBUG) - __inquire_remote_apic(apicid); -} - /* * With 82489DX we can't rely on apic feature bit * retrieved via cpuid but still have to deal with @@ -90,7 +75,7 @@ static inline void default_inquire_remote_apic(int apicid) */ static inline bool apic_from_smp_config(void) { - return smp_found_config && !disable_apic; + return smp_found_config && !apic_is_disabled; } /* @@ -114,8 +99,11 @@ static inline u32 native_apic_mem_read(u32 reg) return *((volatile u32 *)(APIC_BASE + reg)); } -extern void native_apic_wait_icr_idle(void); -extern u32 native_safe_apic_wait_icr_idle(void); +static inline void native_apic_mem_eoi(void) +{ + native_apic_mem_write(APIC_EOI, APIC_EOI_ACK); +} + extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); @@ -149,12 +137,12 @@ extern void setup_secondary_APIC_clock(void); extern void lapic_update_tsc_freq(void); #ifdef CONFIG_X86_64 -static inline int apic_force_enable(unsigned long addr) +static inline bool apic_force_enable(unsigned long addr) { - return -1; + return false; } #else -extern int apic_force_enable(unsigned long addr); +extern bool apic_force_enable(unsigned long addr); #endif extern void apic_ap_setup(void); @@ -207,7 +195,7 @@ static inline void native_apic_msr_write(u32 reg, u32 v) wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0); } -static inline void native_apic_msr_eoi_write(u32 reg, u32 v) +static inline void native_apic_msr_eoi(void) { __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); } @@ -223,18 +211,6 @@ static inline u32 native_apic_msr_read(u32 reg) return (u32)msr; } -static inline void native_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static inline u32 native_safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - static inline void native_x2apic_icr_write(u32 low, u32 id) { wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); @@ -261,7 +237,7 @@ static inline int x2apic_enabled(void) #else /* !CONFIG_X86_X2APIC */ static inline void x2apic_setup(void) { } static inline int x2apic_enabled(void) { return 0; } - +static inline u32 native_apic_msr_read(u32 reg) { BUG(); } #define x2apic_mode (0) #define x2apic_supported() (0) #endif /* !CONFIG_X86_X2APIC */ @@ -280,8 +256,8 @@ struct irq_data; */ struct apic { /* Hotpath functions first */ - void (*eoi_write)(u32 reg, u32 v); - void (*native_eoi_write)(u32 reg, u32 v); + void (*eoi)(void); + void (*native_eoi)(void); void (*write)(u32 reg, u32 v); u32 (*read)(u32 reg); @@ -296,10 +272,11 @@ struct apic { void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); - u32 disable_esr; - enum apic_delivery_modes delivery_mode; - bool dest_mode_logical; + + u32 disable_esr : 1, + dest_mode_logical : 1, + x2apic_set_max_apicid : 1; u32 (*calc_dest_apicid)(unsigned int cpu); @@ -307,19 +284,18 @@ struct apic { u64 (*icr_read)(void); void (*icr_write)(u32 low, u32 high); + /* The limit of the APIC ID space. */ + u32 max_apic_id; + /* Probe, setup and smpboot functions */ int (*probe)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - int (*apic_id_valid)(u32 apicid); - int (*apic_id_registered)(void); + bool (*apic_id_registered)(void); bool (*check_apicid_used)(physid_mask_t *map, int apicid); void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); - void (*setup_apic_routing)(void); int (*cpu_present_to_apicid)(int mps_cpu); - void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); - int (*check_phys_apicid_present)(int phys_apicid); int (*phys_pkg_id)(int cpuid_apic, int index_msb); u32 (*get_apic_id)(unsigned long x); @@ -330,24 +306,26 @@ struct apic { /* wakeup secondary CPU using 64-bit wakeup point */ int (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip); - void (*inquire_remote_apic)(int apicid); - -#ifdef CONFIG_X86_32 - /* - * Called very early during boot from get_smp_config(). It should - * return the logical apicid. x86_[bios]_cpu_to_apicid is - * initialized before this function is called. - * - * If logical apicid can't be determined that early, the function - * may return BAD_APICID. Logical apicid will be configured after - * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity - * won't be applied properly during early boot in this case. - */ - int (*x86_32_early_logical_apicid)(int cpu); -#endif char *name; }; +struct apic_override { + void (*eoi)(void); + void (*native_eoi)(void); + void (*write)(u32 reg, u32 v); + u32 (*read)(u32 reg); + void (*send_IPI)(int cpu, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *msk, int vec); + void (*send_IPI_allbutself)(int vector); + void (*send_IPI_all)(int vector); + void (*send_IPI_self)(int vector); + u64 (*icr_read)(void); + void (*icr_write)(u32 low, u32 high); + int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu_64)(int apicid, unsigned long start_eip); +}; + /* * Pointer to the local APIC driver in use on this system (there's * always just one such driver in use - the kernel decides via an @@ -383,43 +361,111 @@ extern int lapic_can_unplug_cpu(void); #endif #ifdef CONFIG_X86_LOCAL_APIC +extern struct apic_override __x86_apic_override; -static inline u32 apic_read(u32 reg) +void __init apic_setup_apic_calls(void); +void __init apic_install_driver(struct apic *driver); + +#define apic_update_callback(_callback, _fn) { \ + __x86_apic_override._callback = _fn; \ + apic->_callback = _fn; \ + static_call_update(apic_call_##_callback, _fn); \ + pr_info("APIC: %s() replaced with %ps()\n", #_callback, _fn); \ +} + +#define DECLARE_APIC_CALL(__cb) \ + DECLARE_STATIC_CALL(apic_call_##__cb, *apic->__cb) + +DECLARE_APIC_CALL(eoi); +DECLARE_APIC_CALL(native_eoi); +DECLARE_APIC_CALL(icr_read); +DECLARE_APIC_CALL(icr_write); +DECLARE_APIC_CALL(read); +DECLARE_APIC_CALL(send_IPI); +DECLARE_APIC_CALL(send_IPI_mask); +DECLARE_APIC_CALL(send_IPI_mask_allbutself); +DECLARE_APIC_CALL(send_IPI_allbutself); +DECLARE_APIC_CALL(send_IPI_all); +DECLARE_APIC_CALL(send_IPI_self); +DECLARE_APIC_CALL(wait_icr_idle); +DECLARE_APIC_CALL(wakeup_secondary_cpu); +DECLARE_APIC_CALL(wakeup_secondary_cpu_64); +DECLARE_APIC_CALL(write); + +static __always_inline u32 apic_read(u32 reg) +{ + return static_call(apic_call_read)(reg); +} + +static __always_inline void apic_write(u32 reg, u32 val) +{ + static_call(apic_call_write)(reg, val); +} + +static __always_inline void apic_eoi(void) +{ + static_call(apic_call_eoi)(); +} + +static __always_inline void apic_native_eoi(void) { - return apic->read(reg); + static_call(apic_call_native_eoi)(); } -static inline void apic_write(u32 reg, u32 val) +static __always_inline u64 apic_icr_read(void) { - apic->write(reg, val); + return static_call(apic_call_icr_read)(); } -static inline void apic_eoi(void) +static __always_inline void apic_icr_write(u32 low, u32 high) { - apic->eoi_write(APIC_EOI, APIC_EOI_ACK); + static_call(apic_call_icr_write)(low, high); } -static inline u64 apic_icr_read(void) +static __always_inline void __apic_send_IPI(int cpu, int vector) { - return apic->icr_read(); + static_call(apic_call_send_IPI)(cpu, vector); } -static inline void apic_icr_write(u32 low, u32 high) +static __always_inline void __apic_send_IPI_mask(const struct cpumask *mask, int vector) { - apic->icr_write(low, high); + static_call_mod(apic_call_send_IPI_mask)(mask, vector); } -static inline void apic_wait_icr_idle(void) +static __always_inline void __apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - apic->wait_icr_idle(); + static_call(apic_call_send_IPI_mask_allbutself)(mask, vector); } -static inline u32 safe_apic_wait_icr_idle(void) +static __always_inline void __apic_send_IPI_allbutself(int vector) { - return apic->safe_wait_icr_idle(); + static_call(apic_call_send_IPI_allbutself)(vector); } -extern void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)); +static __always_inline void __apic_send_IPI_all(int vector) +{ + static_call(apic_call_send_IPI_all)(vector); +} + +static __always_inline void __apic_send_IPI_self(int vector) +{ + static_call_mod(apic_call_send_IPI_self)(vector); +} + +static __always_inline void apic_wait_icr_idle(void) +{ + static_call_cond(apic_call_wait_icr_idle)(); +} + +static __always_inline u32 safe_apic_wait_icr_idle(void) +{ + return apic->safe_wait_icr_idle ? apic->safe_wait_icr_idle() : 0; +} + +static __always_inline bool apic_id_valid(u32 apic_id) +{ + return apic_id <= apic->max_apic_id; +} #else /* CONFIG_X86_LOCAL_APIC */ @@ -430,22 +476,16 @@ static inline u64 apic_icr_read(void) { return 0; } static inline void apic_icr_write(u32 low, u32 high) { } static inline void apic_wait_icr_idle(void) { } static inline u32 safe_apic_wait_icr_idle(void) { return 0; } -static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} +static inline void apic_set_eoi_cb(void (*eoi)(void)) {} +static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); } +static inline void apic_setup_apic_calls(void) { } + +#define apic_update_callback(_callback, _fn) do { } while (0) #endif /* CONFIG_X86_LOCAL_APIC */ extern void apic_ack_irq(struct irq_data *data); -static inline void ack_APIC_irq(void) -{ - /* - * ack_APIC_irq() actually gets compiled as a single instruction - * ... yummie. - */ - apic_eoi(); -} - - static inline bool lapic_vector_set_in_irr(unsigned int vector) { u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); @@ -475,10 +515,6 @@ extern void generic_bigsmp_probe(void); #include <asm/smp.h> -#define APIC_DFR_VALUE (APIC_DFR_FLAT) - -DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); - extern struct apic apic_noop; static inline unsigned int read_apic_id(void) @@ -490,12 +526,14 @@ static inline unsigned int read_apic_id(void) #ifdef CONFIG_X86_64 typedef int (*wakeup_cpu_handler)(int apicid, unsigned long start_eip); -extern void acpi_wake_cpu_handler_update(wakeup_cpu_handler handler); +extern int default_acpi_madt_oem_check(char *, char *); +extern void x86_64_probe_apic(void); +#else +static inline int default_acpi_madt_oem_check(char *a, char *b) { return 0; } +static inline void x86_64_probe_apic(void) { } #endif extern int default_apic_id_valid(u32 apicid); -extern int default_acpi_madt_oem_check(char *, char *); -extern void default_setup_apic_routing(void); extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); @@ -503,9 +541,12 @@ extern u32 apic_flat_calc_apicid(unsigned int cpu); extern bool default_check_apicid_used(physid_mask_t *map, int apicid); extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern int default_cpu_present_to_apicid(int mps_cpu); -extern int default_check_phys_apicid_present(int phys_apicid); -#endif /* CONFIG_X86_LOCAL_APIC */ +#else /* CONFIG_X86_LOCAL_APIC */ + +static inline unsigned int read_apic_id(void) { return 0; } + +#endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_SMP void apic_smt_update(void); diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 9191280d9ea3..4ae14339cb8c 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -62,4 +62,12 @@ # define BOOT_STACK_SIZE 0x1000 #endif +#ifndef __ASSEMBLY__ +extern unsigned int output_len; +extern const unsigned long kernel_total_size; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)); +#endif + #endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b69b0d7756aa..2061ed1c398f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ @@ -308,6 +307,7 @@ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ #define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ #define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ @@ -384,6 +384,7 @@ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index fafe9be7a6f4..702d93fdd10e 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -105,6 +105,18 @@ # define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31)) #endif +#ifdef CONFIG_X86_USER_SHADOW_STACK +#define DISABLE_USER_SHSTK 0 +#else +#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31)) +#endif + +#ifdef CONFIG_X86_KERNEL_IBT +#define DISABLE_IBT 0 +#else +#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -120,7 +132,7 @@ #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ - DISABLE_CALL_DEPTH_TRACKING) + DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK) #define DISABLED_MASK12 (DISABLE_LAM) #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 @@ -128,7 +140,7 @@ #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ DISABLE_ENQCMD) #define DISABLED_MASK17 0 -#define DISABLED_MASK18 0 +#define DISABLED_MASK18 (DISABLE_IBT) #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index b8f1dc0761e4..9931e4c7d73f 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -71,6 +71,12 @@ static inline u64 mul_u32_u32(u32 a, u32 b) } #define mul_u32_u32 mul_u32_u32 +/* + * __div64_32() is never called on x86, so prevent the + * generic definition from getting built. + */ +#define __div64_32 + #else # include <asm-generic/div64.h> diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8b4be7cecdb8..b0994ae3bc23 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,6 +90,8 @@ static inline void efi_fpu_end(void) } #ifdef CONFIG_X86_32 +#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) + #define arch_efi_call_virt_setup() \ ({ \ efi_fpu_begin(); \ @@ -103,8 +105,7 @@ static inline void efi_fpu_end(void) }) #else /* !CONFIG_X86_32 */ - -#define EFI_LOADER_SIGNATURE "EL64" +#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT extern asmlinkage u64 __efi_call(void *fp, ...); @@ -218,6 +219,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, #ifdef CONFIG_EFI_MIXED +#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) + #define ARCH_HAS_EFISTUB_WRAPPERS static inline bool efi_is_64bit(void) diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 117903881fe4..ce8f50192ae3 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -92,6 +92,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { mds_user_clear_cpu_buffers(); + amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index b475d9a582b8..31089b851c4f 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -82,6 +82,15 @@ static inline void fpregs_unlock(void) preempt_enable(); } +/* + * FPU state gets lazily restored before returning to userspace. So when in the + * kernel, the valid FPU state may be kept in the buffer. This function will force + * restore all the fpu state to the registers early if needed, and lock them from + * being automatically saved/restored. Then FPU state can be modified safely in the + * registers, before unlocking with fpregs_unlock(). + */ +void fpregs_lock_and_load(void); + #ifdef CONFIG_X86_DEBUG_FPU extern void fpregs_assert_state_consistent(void); #else diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h index 4f928d6a367b..697b77e96025 100644 --- a/arch/x86/include/asm/fpu/regset.h +++ b/arch/x86/include/asm/fpu/regset.h @@ -7,11 +7,12 @@ #include <linux/regset.h> -extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; +extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active, + ssp_active; extern user_regset_get2_fn fpregs_get, xfpregs_get, fpregs_soft_get, - xstateregs_get; + xstateregs_get, ssp_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, - xstateregs_set; + xstateregs_set, ssp_set; /* * xstateregs_active == regset_fpregs_active. Please refer to the comment diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index 78fcde7b1f07..ca6e5e5f16b2 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -11,7 +11,8 @@ extern void save_fpregs_to_fpstate(struct fpu *fpu); extern void fpu__drop(struct fpu *fpu); -extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal); +extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal, + unsigned long shstk_addr); extern void fpu_flush_thread(void); /* diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 7f6d858ff47a..eb810074f1e7 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -115,8 +115,8 @@ enum xfeature { XFEATURE_PT_UNIMPLEMENTED_SO_FAR, XFEATURE_PKRU, XFEATURE_PASID, - XFEATURE_RSRVD_COMP_11, - XFEATURE_RSRVD_COMP_12, + XFEATURE_CET_USER, + XFEATURE_CET_KERNEL_UNUSED, XFEATURE_RSRVD_COMP_13, XFEATURE_RSRVD_COMP_14, XFEATURE_LBR, @@ -138,6 +138,8 @@ enum xfeature { #define XFEATURE_MASK_PT (1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR) #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) #define XFEATURE_MASK_PASID (1 << XFEATURE_PASID) +#define XFEATURE_MASK_CET_USER (1 << XFEATURE_CET_USER) +#define XFEATURE_MASK_CET_KERNEL (1 << XFEATURE_CET_KERNEL_UNUSED) #define XFEATURE_MASK_LBR (1 << XFEATURE_LBR) #define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG) #define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA) @@ -253,6 +255,16 @@ struct pkru_state { } __packed; /* + * State component 11 is Control-flow Enforcement user states + */ +struct cet_user_state { + /* user control-flow settings */ + u64 user_cet; + /* user shadow stack pointer */ + u64 user_ssp; +}; + +/* * State component 15: Architectural LBR configuration state. * The size of Arch LBR state depends on the number of LBRs (lbr_depth). */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index cd3dd170e23a..d4427b88ee12 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -50,7 +50,8 @@ #define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA /* All currently supported supervisor features */ -#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID) +#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID | \ + XFEATURE_MASK_CET_USER) /* * A supervisor state component may not always contain valuable information, @@ -77,7 +78,8 @@ * Unsupported supervisor features. When a supervisor feature in this mask is * supported in the future, move it to the supported supervisor feature mask. */ -#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT) +#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT | \ + XFEATURE_MASK_CET_KERNEL) /* All supervisor states including supported and unsupported states. */ #define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index d465ece58151..551829884734 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -97,10 +97,10 @@ extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data); extern void lock_vector_lock(void); extern void unlock_vector_lock(void); #ifdef CONFIG_SMP -extern void send_cleanup_vector(struct irq_cfg *); +extern void vector_schedule_cleanup(struct irq_cfg *); extern void irq_complete_move(struct irq_cfg *cfg); #else -static inline void send_cleanup_vector(struct irq_cfg *c) { } +static inline void vector_schedule_cleanup(struct irq_cfg *c) { } static inline void irq_complete_move(struct irq_cfg *c) { } #endif diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b241af4ce9b4..05fd175cec7d 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -614,7 +614,7 @@ DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault); #endif /* #CP */ -#ifdef CONFIG_X86_KERNEL_IBT +#ifdef CONFIG_X86_CET DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection); #endif @@ -648,7 +648,6 @@ DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI_VECTOR, sysvec_x86_platform_ipi); #ifdef CONFIG_SMP DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi); -DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup); DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot); DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single); DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function); diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index b3af2d45bbbb..5fcd85fd64fd 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -98,8 +98,6 @@ #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ -#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ - #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ @@ -112,21 +110,24 @@ #define INTEL_FAM6_GRANITERAPIDS_X 0xAD #define INTEL_FAM6_GRANITERAPIDS_D 0xAE +/* "Hybrid" Processors (P-Core/E-Core) */ + +#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ + #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ -#define INTEL_FAM6_ALDERLAKE_N 0xBE -#define INTEL_FAM6_RAPTORLAKE 0xB7 +#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ #define INTEL_FAM6_RAPTORLAKE_P 0xBA #define INTEL_FAM6_RAPTORLAKE_S 0xBF #define INTEL_FAM6_METEORLAKE 0xAC #define INTEL_FAM6_METEORLAKE_L 0xAA -#define INTEL_FAM6_LUNARLAKE_M 0xBD - #define INTEL_FAM6_ARROWLAKE 0xC6 +#define INTEL_FAM6_LUNARLAKE_M 0xBD + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ @@ -154,9 +155,10 @@ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ -#define INTEL_FAM6_SIERRAFOREST_X 0xAF +#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ -#define INTEL_FAM6_GRANDRIDGE 0xB6 +#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ +#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index e9025640f634..76238842406a 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -35,9 +35,6 @@ * - Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ -#define ARCH_HAS_IOREMAP_WC -#define ARCH_HAS_IOREMAP_WT - #include <linux/string.h> #include <linux/compiler.h> #include <linux/cc_platform.h> @@ -212,8 +209,6 @@ void memset_io(volatile void __iomem *, int, size_t); #define memcpy_toio memcpy_toio #define memset_io memset_io -#include <asm-generic/iomap.h> - /* * ISA space is 'always mapped' on a typical x86 system, no need to * explicitly ioremap() it. The fact that the ISA IO space is mapped diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 437aa8d00e53..51c782600e02 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -109,8 +109,8 @@ extern int mp_irq_entries; /* MP IRQ source entries */ extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; -/* 1 if "noapic" boot option passed */ -extern int skip_ioapic_setup; +/* True if "noapic" boot option passed */ +extern bool ioapic_is_disabled; /* 1 if "noapic" boot option passed */ extern int noioapicquirk; @@ -129,7 +129,7 @@ extern unsigned long io_apic_irqs; * assignment of PCI IRQ's. */ #define io_apic_assign_pci_irqs \ - (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) + (mp_irq_entries && !ioapic_is_disabled && io_apic_irqs) struct irq_cfg; extern void ioapic_insert_resources(void); @@ -179,6 +179,7 @@ extern void print_IO_APICs(void); #define IO_APIC_IRQ(x) 0 #define io_apic_assign_pci_irqs 0 #define setup_ioapic_ids_from_mpc x86_init_noop +#define nr_ioapics (0) static inline void ioapic_insert_resources(void) { } static inline int arch_early_ioapic_init(void) { return 0; } static inline void print_IO_APICs(void) {} diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 29e083b92813..836c170d3087 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -42,7 +42,7 @@ extern void init_ISA_irqs(void); #ifdef CONFIG_X86_LOCAL_APIC void arch_trigger_cpumask_backtrace(const struct cpumask *mask, - bool exclude_self); + int exclude_cpu); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #endif diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 43dcb9284208..3a19904c2db6 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -35,13 +35,6 @@ */ #define FIRST_EXTERNAL_VECTOR 0x20 -/* - * Reserve the lowest usable vector (and hence lowest priority) 0x20 for - * triggering cleanup after irq migration. 0x21-0x2f will still be used - * for device interrupts. - */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR - #define IA32_SYSCALL_VECTOR 0x80 /* diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 5b77bbc28f96..3be6a98751f0 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -209,6 +209,24 @@ typedef void crash_vmclear_fn(void); extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss; extern void kdump_nmi_shootdown_cpus(void); +#ifdef CONFIG_CRASH_HOTPLUG +void arch_crash_handle_hotplug_event(struct kimage *image); +#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event + +#ifdef CONFIG_HOTPLUG_CPU +int arch_crash_hotplug_cpu_support(void); +#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support +#endif + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_crash_hotplug_memory_support(void); +#define crash_hotplug_memory_support arch_crash_hotplug_memory_support +#endif + +unsigned int arch_crash_get_elfcorehdr_size(void); +#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 0953aa32a324..97a3de7892d3 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -21,7 +21,7 @@ #define FUNCTION_PADDING #endif -#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO) +#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) # define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING #else # define __FUNC_ALIGN __ALIGN diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 56d4ef604b91..635132a12778 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -127,8 +127,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new) static inline bool local_try_cmpxchg(local_t *l, long *old, long new) { - typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old; - return try_cmpxchg_local(&l->a.counter, __old, new); + return try_cmpxchg_local(&l->a.counter, + (typeof(l->a.counter) *) old, new); } /* Always has a lock prefix */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 7f97a8a97e24..473b16d73b47 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -50,8 +50,8 @@ void __init sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); -void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, - bool enc); +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, + unsigned long size, bool enc); void __init mem_encrypt_free_decrypted_mem(void); @@ -85,7 +85,7 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } static inline void __init -early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {} +early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) {} static inline void mem_encrypt_free_decrypted_mem(void) { } diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 66dbba181bd9..bbbe9d744977 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -2,138 +2,77 @@ #ifndef _ASM_X86_MICROCODE_H #define _ASM_X86_MICROCODE_H -#include <asm/cpu.h> -#include <linux/earlycpio.h> -#include <linux/initrd.h> -#include <asm/microcode_amd.h> - -struct ucode_patch { - struct list_head plist; - void *data; /* Intel uses only this one */ - unsigned int size; - u32 patch_id; - u16 equiv_cpu; -}; - -extern struct list_head microcode_cache; - struct cpu_signature { unsigned int sig; unsigned int pf; unsigned int rev; }; -struct device; - -enum ucode_state { - UCODE_OK = 0, - UCODE_NEW, - UCODE_UPDATED, - UCODE_NFOUND, - UCODE_ERROR, -}; - -struct microcode_ops { - enum ucode_state (*request_microcode_fw) (int cpu, struct device *); - - void (*microcode_fini_cpu) (int cpu); - - /* - * The generic 'microcode_core' part guarantees that - * the callbacks below run on a target cpu when they - * are being called. - * See also the "Synchronization" section in microcode_core.c. - */ - enum ucode_state (*apply_microcode) (int cpu); - int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); -}; - struct ucode_cpu_info { struct cpu_signature cpu_sig; void *mc; }; -extern struct ucode_cpu_info ucode_cpu_info[]; -struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa); - -#ifdef CONFIG_MICROCODE_INTEL -extern struct microcode_ops * __init init_intel_microcode(void); -#else -static inline struct microcode_ops * __init init_intel_microcode(void) -{ - return NULL; -} -#endif /* CONFIG_MICROCODE_INTEL */ -#ifdef CONFIG_MICROCODE_AMD -extern struct microcode_ops * __init init_amd_microcode(void); -extern void __exit exit_amd_microcode(void); +#ifdef CONFIG_MICROCODE +void load_ucode_bsp(void); +void load_ucode_ap(void); +void microcode_bsp_resume(void); #else -static inline struct microcode_ops * __init init_amd_microcode(void) -{ - return NULL; -} -static inline void __exit exit_amd_microcode(void) {} +static inline void load_ucode_bsp(void) { } +static inline void load_ucode_ap(void) { } +static inline void microcode_bsp_resume(void) { } #endif -#define MAX_UCODE_COUNT 128 +#ifdef CONFIG_CPU_SUP_INTEL +/* Intel specific microcode defines. Public for IFS */ +struct microcode_header_intel { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int metasize; + unsigned int reserved[2]; +}; -#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) -#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') -#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') -#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') -#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') -#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') -#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') +struct microcode_intel { + struct microcode_header_intel hdr; + unsigned int bits[]; +}; -#define CPUID_IS(a, b, c, ebx, ecx, edx) \ - (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define MC_HEADER_TYPE_MICROCODE 1 +#define MC_HEADER_TYPE_IFS 2 -/* - * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. - * x86_cpuid_vendor() gets vendor id for BSP. - * - * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify - * coding, we still use x86_cpuid_vendor() to get vendor id for AP. - * - * x86_cpuid_vendor() gets vendor information directly from CPUID. - */ -static inline int x86_cpuid_vendor(void) +static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr) { - u32 eax = 0x00000000; - u32 ebx, ecx = 0, edx; - - native_cpuid(&eax, &ebx, &ecx, &edx); - - if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) - return X86_VENDOR_INTEL; - - if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) - return X86_VENDOR_AMD; - - return X86_VENDOR_UNKNOWN; + return hdr->datasize ? : DEFAULT_UCODE_DATASIZE; } -static inline unsigned int x86_cpuid_family(void) +static inline u32 intel_get_microcode_revision(void) { - u32 eax = 0x00000001; - u32 ebx, ecx = 0, edx; + u32 rev, dummy; + + native_wrmsrl(MSR_IA32_UCODE_REV, 0); - native_cpuid(&eax, &ebx, &ecx, &edx); + /* As documented in the SDM: Do a CPUID 1 here */ + native_cpuid_eax(1); - return x86_family(eax); + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); + + return rev; } -#ifdef CONFIG_MICROCODE -extern void __init load_ucode_bsp(void); -extern void load_ucode_ap(void); -void reload_early_microcode(unsigned int cpu); -extern bool initrd_gone; -void microcode_bsp_resume(void); -#else -static inline void __init load_ucode_bsp(void) { } -static inline void load_ucode_ap(void) { } -static inline void reload_early_microcode(unsigned int cpu) { } -static inline void microcode_bsp_resume(void) { } -#endif +void show_ucode_info_early(void); + +#else /* CONFIG_CPU_SUP_INTEL */ +static inline void show_ucode_info_early(void) { } +#endif /* !CONFIG_CPU_SUP_INTEL */ #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h deleted file mode 100644 index 9675c621c1ca..000000000000 --- a/arch/x86/include/asm/microcode_amd.h +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MICROCODE_AMD_H -#define _ASM_X86_MICROCODE_AMD_H - -#include <asm/microcode.h> - -#define UCODE_MAGIC 0x00414d44 -#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 -#define UCODE_UCODE_TYPE 0x00000001 - -#define SECTION_HDR_SIZE 8 -#define CONTAINER_HDR_SZ 12 - -struct equiv_cpu_entry { - u32 installed_cpu; - u32 fixed_errata_mask; - u32 fixed_errata_compare; - u16 equiv_cpu; - u16 res; -} __attribute__((packed)); - -struct microcode_header_amd { - u32 data_code; - u32 patch_id; - u16 mc_patch_data_id; - u8 mc_patch_data_len; - u8 init_flag; - u32 mc_patch_data_checksum; - u32 nb_dev_id; - u32 sb_dev_id; - u16 processor_rev_id; - u8 nb_rev_id; - u8 sb_rev_id; - u8 bios_api_rev; - u8 reserved1[3]; - u32 match_reg[8]; -} __attribute__((packed)); - -struct microcode_amd { - struct microcode_header_amd hdr; - unsigned int mpb[]; -}; - -#define PATCH_MAX_SIZE (3 * PAGE_SIZE) - -#ifdef CONFIG_MICROCODE_AMD -extern void __init load_ucode_amd_bsp(unsigned int family); -extern void load_ucode_amd_ap(unsigned int family); -extern int __init save_microcode_in_initrd_amd(unsigned int family); -void reload_ucode_amd(unsigned int cpu); -extern void amd_check_microcode(void); -#else -static inline void __init load_ucode_amd_bsp(unsigned int family) {} -static inline void load_ucode_amd_ap(unsigned int family) {} -static inline int __init -save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } -static inline void reload_ucode_amd(unsigned int cpu) {} -static inline void amd_check_microcode(void) {} -#endif -#endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h deleted file mode 100644 index f1fa979e05bf..000000000000 --- a/arch/x86/include/asm/microcode_intel.h +++ /dev/null @@ -1,88 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MICROCODE_INTEL_H -#define _ASM_X86_MICROCODE_INTEL_H - -#include <asm/microcode.h> - -struct microcode_header_intel { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int metasize; - unsigned int reserved[2]; -}; - -struct microcode_intel { - struct microcode_header_intel hdr; - unsigned int bits[]; -}; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[]; -}; - -#define DEFAULT_UCODE_DATASIZE (2000) -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) -#define MC_HEADER_TYPE_MICROCODE 1 -#define MC_HEADER_TYPE_IFS 2 - -#define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.totalsize : \ - DEFAULT_UCODE_TOTALSIZE) - -#define get_datasize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -static inline u32 intel_get_microcode_revision(void) -{ - u32 rev, dummy; - - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - native_cpuid_eax(1); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); - - return rev; -} - -#ifdef CONFIG_MICROCODE_INTEL -extern void __init load_ucode_intel_bsp(void); -extern void load_ucode_intel_ap(void); -extern void show_ucode_info_early(void); -extern int __init save_microcode_in_initrd_intel(void); -void reload_ucode_intel(void); -#else -static inline __init void load_ucode_intel_bsp(void) {} -static inline void load_ucode_intel_ap(void) {} -static inline void show_ucode_info_early(void) {} -static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; } -static inline void reload_ucode_intel(void) {} -#endif - -#endif /* _ASM_X86_MICROCODE_INTEL_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 1d29dc791f5a..416901d406f8 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -186,6 +186,8 @@ do { \ #else #define deactivate_mm(tsk, mm) \ do { \ + if (!tsk->vfork_done) \ + shstk_free(tsk); \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index e90ac7e9ae2c..f46df8349e86 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -23,8 +23,6 @@ extern int pic_mode; #define MAX_IRQ_SOURCES 256 -extern unsigned int def_to_bigsmp; - #else /* CONFIG_X86_64: */ #define MAX_MP_BUSSES 256 @@ -41,7 +39,6 @@ extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); extern unsigned int boot_cpu_physical_apicid; extern u8 boot_cpu_apic_version; -extern unsigned long mp_lapic_addr; #ifdef CONFIG_X86_LOCAL_APIC extern int smp_found_config; @@ -76,7 +73,7 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { } #define default_get_smp_config x86_init_uint_noop #endif -int generic_processor_info(int apicid, int version); +int generic_processor_info(int apicid); #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) @@ -87,13 +84,7 @@ struct physid_mask { typedef struct physid_mask physid_mask_t; #define physid_set(physid, map) set_bit(physid, (map).mask) -#define physid_clear(physid, map) clear_bit(physid, (map).mask) #define physid_isset(physid, map) test_bit(physid, (map).mask) -#define physid_test_and_set(physid, map) \ - test_and_set_bit(physid, (map).mask) - -#define physids_and(dst, src1, src2) \ - bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) #define physids_or(dst, src1, src2) \ bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) @@ -101,29 +92,9 @@ typedef struct physid_mask physid_mask_t; #define physids_clear(map) \ bitmap_zero((map).mask, MAX_LOCAL_APIC) -#define physids_complement(dst, src) \ - bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC) - #define physids_empty(map) \ bitmap_empty((map).mask, MAX_LOCAL_APIC) -#define physids_equal(map1, map2) \ - bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC) - -#define physids_weight(map) \ - bitmap_weight((map).mask, MAX_LOCAL_APIC) - -#define physids_shift_right(d, s, n) \ - bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC) - -#define physids_shift_left(d, s, n) \ - bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC) - -static inline unsigned long physids_coerce(physid_mask_t *map) -{ - return map->mask[0]; -} - static inline void physids_promote(unsigned long physids, physid_mask_t *map) { physids_clear(*map); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 3faf044569a5..c55cc243592e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -272,9 +272,9 @@ .endm #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#define CALL_UNTRAIN_RET "call entry_untrain_ret" #else -#define CALL_ZEN_UNTRAIN_RET "" +#define CALL_UNTRAIN_RET "" #endif /* @@ -282,7 +282,7 @@ * return thunk isn't mapped into the userspace tables (then again, AMD * typically has NO_MELTDOWN). * - * While zen_untrain_ret() doesn't clobber anything but requires stack, + * While retbleed_untrain_ret() doesn't clobber anything but requires stack, * entry_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point @@ -293,14 +293,20 @@ defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) VALIDATE_UNRET_END ALTERNATIVE_3 "", \ - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH #endif +.endm -#ifdef CONFIG_CPU_SRSO - ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \ - "call srso_untrain_ret_alias", X86_FEATURE_SRSO_ALIAS +.macro UNTRAIN_RET_VM +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) + VALIDATE_UNRET_END + ALTERNATIVE_3 "", \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \ + __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH #endif .endm @@ -309,15 +315,10 @@ defined(CONFIG_CALL_DEPTH_TRACKING) VALIDATE_UNRET_END ALTERNATIVE_3 "", \ - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH #endif - -#ifdef CONFIG_CPU_SRSO - ALTERNATIVE_2 "", "call srso_untrain_ret", X86_FEATURE_SRSO, \ - "call srso_untrain_ret_alias", X86_FEATURE_SRSO_ALIAS -#endif .endm @@ -341,17 +342,24 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; +#ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); -extern void zen_untrain_ret(void); +#else +static inline void __x86_return_thunk(void) {} +#endif + +extern void retbleed_return_thunk(void); +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); + +extern void retbleed_untrain_ret(void); extern void srso_untrain_ret(void); -extern void srso_untrain_ret_alias(void); +extern void srso_alias_untrain_ret(void); + +extern void entry_untrain_ret(void); extern void entry_ibpb(void); -#ifdef CONFIG_CALL_THUNKS extern void (*x86_return_thunk)(void); -#else -#define x86_return_thunk (&__x86_return_thunk) -#endif #ifdef CONFIG_CALL_DEPTH_TRACKING extern void __x86_return_skl(void); @@ -478,9 +486,6 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_SECCOMP, }; -extern char __indirect_thunk_start[]; -extern char __indirect_thunk_end[]; - static __always_inline void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index b49778664d2b..6c8ff12140ae 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -739,6 +739,7 @@ static __always_inline unsigned long arch_local_irq_save(void) ".popsection") extern void default_banner(void); +void native_pv_lock_init(void) __init; #else /* __ASSEMBLY__ */ @@ -778,6 +779,12 @@ extern void default_banner(void); #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop + +#ifndef __ASSEMBLY__ +static inline void native_pv_lock_init(void) +{ +} +#endif #endif /* !CONFIG_PARAVIRT */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5700bb337987..d6ad98ca1288 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -125,9 +125,15 @@ extern pmdval_t early_pmd_flags; * The following only work if pte_present() is true. * Undefined behaviour if not.. */ -static inline int pte_dirty(pte_t pte) +static inline bool pte_dirty(pte_t pte) { - return pte_flags(pte) & _PAGE_DIRTY; + return pte_flags(pte) & _PAGE_DIRTY_BITS; +} + +static inline bool pte_shstk(pte_t pte) +{ + return cpu_feature_enabled(X86_FEATURE_SHSTK) && + (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY; } static inline int pte_young(pte_t pte) @@ -135,9 +141,16 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } -static inline int pmd_dirty(pmd_t pmd) +static inline bool pmd_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_DIRTY_BITS; +} + +static inline bool pmd_shstk(pmd_t pmd) { - return pmd_flags(pmd) & _PAGE_DIRTY; + return cpu_feature_enabled(X86_FEATURE_SHSTK) && + (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) == + (_PAGE_DIRTY | _PAGE_PSE); } #define pmd_young pmd_young @@ -146,9 +159,9 @@ static inline int pmd_young(pmd_t pmd) return pmd_flags(pmd) & _PAGE_ACCESSED; } -static inline int pud_dirty(pud_t pud) +static inline bool pud_dirty(pud_t pud) { - return pud_flags(pud) & _PAGE_DIRTY; + return pud_flags(pud) & _PAGE_DIRTY_BITS; } static inline int pud_young(pud_t pud) @@ -158,7 +171,27 @@ static inline int pud_young(pud_t pud) static inline int pte_write(pte_t pte) { - return pte_flags(pte) & _PAGE_RW; + /* + * Shadow stack pages are logically writable, but do not have + * _PAGE_RW. Check for them separately from _PAGE_RW itself. + */ + return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte); +} + +#define pmd_write pmd_write +static inline int pmd_write(pmd_t pmd) +{ + /* + * Shadow stack pages are logically writable, but do not have + * _PAGE_RW. Check for them separately from _PAGE_RW itself. + */ + return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd); +} + +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pud_flags(pud) & _PAGE_RW; } static inline int pte_huge(pte_t pte) @@ -185,6 +218,8 @@ static inline int pte_special(pte_t pte) static inline u64 protnone_mask(u64 val); +#define PFN_PTE_SHIFT PAGE_SHIFT + static inline unsigned long pte_pfn(pte_t pte) { phys_addr_t pfn = pte_val(pte); @@ -290,9 +325,63 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) return native_make_pte(v & ~clear); } +/* + * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the + * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So + * when creating dirty, write-protected memory, a software bit is used: + * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the + * Dirty bit to SavedDirty, and vice-vesra. + * + * This shifting is only done if needed. In the case of shifting + * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of + * shifting SavedDirty->Dirty, the condition is Write=1. + */ +static inline pgprotval_t mksaveddirty_shift(pgprotval_t v) +{ + pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1; + + v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY; + v &= ~(cond << _PAGE_BIT_DIRTY); + + return v; +} + +static inline pgprotval_t clear_saveddirty_shift(pgprotval_t v) +{ + pgprotval_t cond = (v >> _PAGE_BIT_RW) & 1; + + v |= ((v >> _PAGE_BIT_SAVED_DIRTY) & cond) << _PAGE_BIT_DIRTY; + v &= ~(cond << _PAGE_BIT_SAVED_DIRTY); + + return v; +} + +static inline pte_t pte_mksaveddirty(pte_t pte) +{ + pteval_t v = native_pte_val(pte); + + v = mksaveddirty_shift(v); + return native_make_pte(v); +} + +static inline pte_t pte_clear_saveddirty(pte_t pte) +{ + pteval_t v = native_pte_val(pte); + + v = clear_saveddirty_shift(v); + return native_make_pte(v); +} + static inline pte_t pte_wrprotect(pte_t pte) { - return pte_clear_flags(pte, _PAGE_RW); + pte = pte_clear_flags(pte, _PAGE_RW); + + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PTE (Write=0,Dirty=1). Move the hardware + * dirty value to the software bit, if present. + */ + return pte_mksaveddirty(pte); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP @@ -330,7 +419,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { - return pte_clear_flags(pte, _PAGE_DIRTY); + return pte_clear_flags(pte, _PAGE_DIRTY_BITS); } static inline pte_t pte_mkold(pte_t pte) @@ -345,7 +434,16 @@ static inline pte_t pte_mkexec(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + pte = pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + + return pte_mksaveddirty(pte); +} + +static inline pte_t pte_mkwrite_shstk(pte_t pte) +{ + pte = pte_clear_flags(pte, _PAGE_RW); + + return pte_set_flags(pte, _PAGE_DIRTY); } static inline pte_t pte_mkyoung(pte_t pte) @@ -353,11 +451,15 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte_set_flags(pte, _PAGE_ACCESSED); } -static inline pte_t pte_mkwrite(pte_t pte) +static inline pte_t pte_mkwrite_novma(pte_t pte) { return pte_set_flags(pte, _PAGE_RW); } +struct vm_area_struct; +pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma); +#define pte_mkwrite pte_mkwrite + static inline pte_t pte_mkhuge(pte_t pte) { return pte_set_flags(pte, _PAGE_PSE); @@ -402,9 +504,34 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) return native_make_pmd(v & ~clear); } +/* See comments above mksaveddirty_shift() */ +static inline pmd_t pmd_mksaveddirty(pmd_t pmd) +{ + pmdval_t v = native_pmd_val(pmd); + + v = mksaveddirty_shift(v); + return native_make_pmd(v); +} + +/* See comments above mksaveddirty_shift() */ +static inline pmd_t pmd_clear_saveddirty(pmd_t pmd) +{ + pmdval_t v = native_pmd_val(pmd); + + v = clear_saveddirty_shift(v); + return native_make_pmd(v); +} + static inline pmd_t pmd_wrprotect(pmd_t pmd) { - return pmd_clear_flags(pmd, _PAGE_RW); + pmd = pmd_clear_flags(pmd, _PAGE_RW); + + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PMD (RW=0, Dirty=1). Move the hardware + * dirty value to the software bit. + */ + return pmd_mksaveddirty(pmd); } #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP @@ -431,12 +558,21 @@ static inline pmd_t pmd_mkold(pmd_t pmd) static inline pmd_t pmd_mkclean(pmd_t pmd) { - return pmd_clear_flags(pmd, _PAGE_DIRTY); + return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + pmd = pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + + return pmd_mksaveddirty(pmd); +} + +static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd) +{ + pmd = pmd_clear_flags(pmd, _PAGE_RW); + + return pmd_set_flags(pmd, _PAGE_DIRTY); } static inline pmd_t pmd_mkdevmap(pmd_t pmd) @@ -454,11 +590,14 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_ACCESSED); } -static inline pmd_t pmd_mkwrite(pmd_t pmd) +static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_RW); } +pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); +#define pmd_mkwrite pmd_mkwrite + static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); @@ -473,6 +612,24 @@ static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) return native_make_pud(v & ~clear); } +/* See comments above mksaveddirty_shift() */ +static inline pud_t pud_mksaveddirty(pud_t pud) +{ + pudval_t v = native_pud_val(pud); + + v = mksaveddirty_shift(v); + return native_make_pud(v); +} + +/* See comments above mksaveddirty_shift() */ +static inline pud_t pud_clear_saveddirty(pud_t pud) +{ + pudval_t v = native_pud_val(pud); + + v = clear_saveddirty_shift(v); + return native_make_pud(v); +} + static inline pud_t pud_mkold(pud_t pud) { return pud_clear_flags(pud, _PAGE_ACCESSED); @@ -480,17 +637,26 @@ static inline pud_t pud_mkold(pud_t pud) static inline pud_t pud_mkclean(pud_t pud) { - return pud_clear_flags(pud, _PAGE_DIRTY); + return pud_clear_flags(pud, _PAGE_DIRTY_BITS); } static inline pud_t pud_wrprotect(pud_t pud) { - return pud_clear_flags(pud, _PAGE_RW); + pud = pud_clear_flags(pud, _PAGE_RW); + + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PUD (RW=0, Dirty=1). Move the hardware + * dirty value to the software bit. + */ + return pud_mksaveddirty(pud); } static inline pud_t pud_mkdirty(pud_t pud) { - return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + pud = pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + + return pud_mksaveddirty(pud); } static inline pud_t pud_mkdevmap(pud_t pud) @@ -510,7 +676,9 @@ static inline pud_t pud_mkyoung(pud_t pud) static inline pud_t pud_mkwrite(pud_t pud) { - return pud_set_flags(pud, _PAGE_RW); + pud = pud_set_flags(pud, _PAGE_RW); + + return pud_clear_saveddirty(pud); } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY @@ -627,6 +795,7 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask); static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pteval_t val = pte_val(pte), oldval = val; + pte_t pte_result; /* * Chop off the NX bit (if present), and add the NX portion of @@ -635,17 +804,54 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) val &= _PAGE_CHG_MASK; val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PTE_PFN_MASK); - return __pte(val); + + pte_result = __pte(val); + + /* + * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid: + * 1. Marking Write=0 PTEs Dirty=1 + * 2. Marking Dirty=1 PTEs Write=0 + * + * The first case cannot happen because the _PAGE_CHG_MASK will filter + * out any Dirty bit passed in newprot. Handle the second case by + * going through the mksaveddirty exercise. Only do this if the old + * value was Write=1 to avoid doing this on Shadow Stack PTEs. + */ + if (oldval & _PAGE_RW) + pte_result = pte_mksaveddirty(pte_result); + else + pte_result = pte_clear_saveddirty(pte_result); + + return pte_result; } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { pmdval_t val = pmd_val(pmd), oldval = val; + pmd_t pmd_result; - val &= _HPAGE_CHG_MASK; + val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY); val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK; val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK); - return __pmd(val); + + pmd_result = __pmd(val); + + /* + * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid: + * 1. Marking Write=0 PMDs Dirty=1 + * 2. Marking Dirty=1 PMDs Write=0 + * + * The first case cannot happen because the _PAGE_CHG_MASK will filter + * out any Dirty bit passed in newprot. Handle the second case by + * going through the mksaveddirty exercise. Only do this if the old + * value was Write=1 to avoid doing this on Shadow Stack PTEs. + */ + if (oldval & _PAGE_RW) + pmd_result = pmd_mksaveddirty(pmd_result); + else + pmd_result = pmd_clear_saveddirty(pmd_result); + + return pmd_result; } /* @@ -829,7 +1035,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) * (Currently stuck as a macro because of indirect forward reference * to linux/mm.h:page_to_nid()) */ -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) +#define mk_pte(page, pgprot) \ +({ \ + pgprot_t __pgprot = pgprot; \ + \ + WARN_ON_ONCE((pgprot_val(__pgprot) & (_PAGE_DIRTY | _PAGE_RW)) == \ + _PAGE_DIRTY); \ + pfn_pte(page_to_pfn(page), __pgprot); \ +}) static inline int pmd_bad(pmd_t pmd) { @@ -1020,24 +1233,17 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) return res; } -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - page_table_check_pte_set(mm, addr, ptep, pte); - set_pte(ptep, pte); -} - static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - page_table_check_pmd_set(mm, addr, pmdp, pmd); + page_table_check_pmd_set(mm, pmdp, pmd); set_pmd(pmdp, pmd); } static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { - page_table_check_pud_set(mm, addr, pudp, pud); + page_table_check_pud_set(mm, pudp, pud); native_set_pud(pudp, pud); } @@ -1068,7 +1274,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = native_ptep_get_and_clear(ptep); - page_table_check_pte_clear(mm, addr, pte); + page_table_check_pte_clear(mm, pte); return pte; } @@ -1084,7 +1290,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, * care about updates and native needs no locking */ pte = native_local_ptep_get_and_clear(ptep); - page_table_check_pte_clear(mm, addr, pte); + page_table_check_pte_clear(mm, pte); } else { pte = ptep_get_and_clear(mm, addr, ptep); } @@ -1095,7 +1301,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte); + /* + * Avoid accidentally creating shadow stack PTEs + * (Write=0,Dirty=1). Use cmpxchg() to prevent races with + * the hardware setting Dirty=1. + */ + pte_t old_pte, new_pte; + + old_pte = READ_ONCE(*ptep); + do { + new_pte = pte_wrprotect(old_pte); + } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); } #define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) @@ -1121,19 +1337,13 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define pmd_write pmd_write -static inline int pmd_write(pmd_t pmd) -{ - return pmd_flags(pmd) & _PAGE_RW; -} - #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { pmd_t pmd = native_pmdp_get_and_clear(pmdp); - page_table_check_pmd_clear(mm, addr, pmd); + page_table_check_pmd_clear(mm, pmd); return pmd; } @@ -1144,7 +1354,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, { pud_t pud = native_pudp_get_and_clear(pudp); - page_table_check_pud_clear(mm, addr, pud); + page_table_check_pud_clear(mm, pud); return pud; } @@ -1153,13 +1363,17 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); -} + /* + * Avoid accidentally creating shadow stack PTEs + * (Write=0,Dirty=1). Use cmpxchg() to prevent races with + * the hardware setting Dirty=1. + */ + pmd_t old_pmd, new_pmd; -#define pud_write pud_write -static inline int pud_write(pud_t pud) -{ - return pud_flags(pud) & _PAGE_RW; + old_pmd = READ_ONCE(*pmdp); + do { + new_pmd = pmd_wrprotect(old_pmd); + } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd)); } #ifndef pmdp_establish @@ -1167,7 +1381,7 @@ static inline int pud_write(pud_t pud) static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { - page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd); + page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); if (IS_ENABLED(CONFIG_SMP)) { return xchg(pmdp, pmd); } else { @@ -1292,6 +1506,11 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { } +static inline void update_mmu_cache_range(struct vm_fault *vmf, + struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep, unsigned int nr) +{ +} static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { @@ -1412,6 +1631,11 @@ static inline bool __pte_access_permitted(unsigned long pteval, bool write) { unsigned long need_pte_bits = _PAGE_PRESENT|_PAGE_USER; + /* + * Write=0,Dirty=1 PTEs are shadow stack, which the kernel + * shouldn't generally allow access to, but since they + * are already Write=0, the below logic covers both cases. + */ if (write) need_pte_bits |= _PAGE_RW; @@ -1453,6 +1677,12 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#define arch_check_zapped_pte arch_check_zapped_pte +void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte); + +#define arch_check_zapped_pmd arch_check_zapped_pmd +void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd); + #ifdef CONFIG_XEN_PV #define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young static inline bool arch_has_hw_nonleaf_pmd_young(void) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index ba3e2554799a..0b748ee16b3d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -21,7 +21,8 @@ #define _PAGE_BIT_SOFTW2 10 /* " */ #define _PAGE_BIT_SOFTW3 11 /* " */ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ -#define _PAGE_BIT_SOFTW4 58 /* available for programmer */ +#define _PAGE_BIT_SOFTW4 57 /* available for programmer */ +#define _PAGE_BIT_SOFTW5 58 /* available for programmer */ #define _PAGE_BIT_PKEY_BIT0 59 /* Protection Keys, bit 1/4 */ #define _PAGE_BIT_PKEY_BIT1 60 /* Protection Keys, bit 2/4 */ #define _PAGE_BIT_PKEY_BIT2 61 /* Protection Keys, bit 3/4 */ @@ -34,6 +35,13 @@ #define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */ #define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4 +#ifdef CONFIG_X86_64 +#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */ +#else +/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */ +#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */ +#endif + /* If _PAGE_BIT_PRESENT is clear, we use these: */ /* - if the user mapped it with PROT_NONE; pte_present gives true */ #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL @@ -117,6 +125,18 @@ #define _PAGE_SOFTW4 (_AT(pteval_t, 0)) #endif +/* + * The hardware requires shadow stack to be Write=0,Dirty=1. However, + * there are valid cases where the kernel might create read-only PTEs that + * are dirty (e.g., fork(), mprotect(), uffd-wp(), soft-dirty tracking). In + * this case, the _PAGE_SAVED_DIRTY bit is used instead of the HW-dirty bit, + * to avoid creating a wrong "shadow stack" PTEs. Such PTEs have + * (Write=0,SavedDirty=1,Dirty=0) set. + */ +#define _PAGE_SAVED_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SAVED_DIRTY) + +#define _PAGE_DIRTY_BITS (_PAGE_DIRTY | _PAGE_SAVED_DIRTY) + #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) /* @@ -125,11 +145,12 @@ * instance, and is *not* included in this mask since * pte_modify() does modify it. */ -#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ - _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ - _PAGE_UFFD_WP) -#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) +#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ + _PAGE_SPECIAL | _PAGE_ACCESSED | \ + _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \ + _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP) +#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) +#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) /* * The cache modes defined here are used to translate between pure SW usage @@ -188,14 +209,22 @@ enum page_cache_mode { #define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) + +/* + * Page tables needs to have Write=1 in order for any lower PTEs to be + * writable. This includes shadow stack memory (Write=0, Dirty=1) + */ #define _KERNPG_TABLE_NOENC (__PP|__RW| 0|___A| 0|___D| 0| 0) #define _KERNPG_TABLE (__PP|__RW| 0|___A| 0|___D| 0| 0| _ENC) #define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) #define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) -#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) -#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G) + +#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX| 0| 0|___G) +#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0| 0| 0|___G) +#define __PAGE_KERNEL (__PP|__RW| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_EXEC (__PP|__RW| 0|___A| 0|___D| 0|___G) #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) -#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX| 0| 0|___G) #define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) #define __PAGE_KERNEL_LARGE_EXEC (__PP|__RW| 0|___A| 0|___D|_PSE|___G) #define __PAGE_KERNEL_WP (__PP|__RW| 0|___A|__NX|___D| 0|___G| __WP) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4ae2773b873d..0086920cda06 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -28,6 +28,7 @@ struct vm86; #include <asm/unwind_hints.h> #include <asm/vmxfeatures.h> #include <asm/vdso/processor.h> +#include <asm/shstk.h> #include <linux/personality.h> #include <linux/cache.h> @@ -190,7 +191,6 @@ static inline unsigned long long l1tf_pfn_limit(void) } extern void early_cpu_init(void); -extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); @@ -475,6 +475,13 @@ struct thread_struct { */ u32 pkru; +#ifdef CONFIG_X86_USER_SHADOW_STACK + unsigned long features; + unsigned long features_locked; + + struct thread_shstk shstk; +#endif + /* Floating point and extended processor state */ struct fpu fpu; /* @@ -586,7 +593,6 @@ extern char ignore_fpu_irq; #define HAVE_ARCH_PICK_MMAP_LAYOUT 1 #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH #ifdef CONFIG_X86_32 # define BASE_PREFETCH "" @@ -620,11 +626,6 @@ static __always_inline void prefetchw(const void *x) "m" (*(const char *)x)); } -static inline void spin_lock_prefetch(const void *x) -{ - prefetchw(x); -} - #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ TOP_OF_KERNEL_STACK_PADDING) @@ -684,11 +685,13 @@ extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_highest_perf(void); extern bool cpu_has_ibpb_brtype_microcode(void); extern void amd_clear_divider(void); +extern void amd_check_microcode(void); #else static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline u32 amd_get_highest_perf(void) { return 0; } static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } static inline void amd_clear_divider(void) { } +static inline void amd_check_microcode(void) { } #endif extern unsigned long arch_align_stack(unsigned long sp); diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index d87451df480b..cde8357bb226 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -74,8 +74,6 @@ static inline bool vcpu_is_preempted(long cpu) */ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); -void native_pv_lock_init(void) __init; - /* * Shortcut for the queued_spin_lock_slowpath() function that allows * virt to hijack it. @@ -103,10 +101,7 @@ static inline bool virt_spin_lock(struct qspinlock *lock) return true; } -#else -static inline void native_pv_lock_init(void) -{ -} + #endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 42b17cf10b10..85b6e3609cb9 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -4,6 +4,8 @@ #include <asm/ibt.h> +void __lockfunc __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked); + /* * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit * registers. For i386, however, only 1 32-bit register needs to be saved diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 7fa611216417..4b081e0d3306 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,12 +2,7 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -/* This counts to 12. Any more, it will return 13th argument. */ -#define __RMWcc_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n -#define RMWcc_ARGS(X...) __RMWcc_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) - -#define __RMWcc_CONCAT(a, b) a ## b -#define RMWcc_CONCAT(a, b) __RMWcc_CONCAT(a, b) +#include <linux/args.h> #define __CLOBBERS_MEM(clb...) "memory", ## clb @@ -48,7 +43,7 @@ cc_label: c = true; \ #define GEN_UNARY_RMWcc_3(op, var, cc) \ GEN_UNARY_RMWcc_4(op, var, cc, "%[var]") -#define GEN_UNARY_RMWcc(X...) RMWcc_CONCAT(GEN_UNARY_RMWcc_, RMWcc_ARGS(X))(X) +#define GEN_UNARY_RMWcc(X...) CONCATENATE(GEN_UNARY_RMWcc_, COUNT_ARGS(X))(X) #define GEN_BINARY_RMWcc_6(op, var, cc, vcon, _val, arg0) \ __GEN_RMWcc(op " %[val], " arg0, var, cc, \ @@ -57,7 +52,7 @@ cc_label: c = true; \ #define GEN_BINARY_RMWcc_5(op, var, cc, vcon, val) \ GEN_BINARY_RMWcc_6(op, var, cc, vcon, val, "%[var]") -#define GEN_BINARY_RMWcc(X...) RMWcc_CONCAT(GEN_BINARY_RMWcc_, RMWcc_ARGS(X))(X) +#define GEN_BINARY_RMWcc(X...) CONCATENATE(GEN_BINARY_RMWcc_, COUNT_ARGS(X))(X) #define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, cc, clobbers...) \ __GEN_RMWcc(op " %[var]\n\t" suffix, var, cc, \ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index a6e8373a5170..3fa87e5e11ab 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -2,8 +2,6 @@ #ifndef _ASM_X86_SECTIONS_H #define _ASM_X86_SECTIONS_H -#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed - #include <asm-generic/sections.h> #include <asm/extable.h> @@ -18,20 +16,4 @@ extern char __end_of_kernel_reserve[]; extern unsigned long _brk_start, _brk_end; -static inline bool arch_is_kernel_initmem_freed(unsigned long addr) -{ - /* - * If _brk_start has not been cleared, brk allocation is incomplete, - * and we can not make assumptions about its use. - */ - if (_brk_start) - return 0; - - /* - * After brk allocation is complete, space between _brk_end and _end - * is available for allocation. - */ - return addr >= _brk_end && addr < (unsigned long)&_end; -} - #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 794f69625780..9d6411c65920 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -56,7 +56,7 @@ #define GDT_ENTRY_INVALID_SEG 0 -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64) /* * The layout of the per-CPU GDT under Linux: * diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 66c806784c52..5b4a1ce3d368 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -164,6 +164,7 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern void sev_enable(struct boot_params *bp); static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { @@ -210,12 +211,15 @@ bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); +u64 snp_get_unsupported_features(u64 status); +u64 sev_get_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } @@ -235,6 +239,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } +static inline u64 snp_get_unsupported_features(u64 status) { return 0; } +static inline u64 sev_get_status(void) { return 0; } #endif #endif diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h new file mode 100644 index 000000000000..42fee8959df7 --- /dev/null +++ b/arch/x86/include/asm/shstk.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SHSTK_H +#define _ASM_X86_SHSTK_H + +#ifndef __ASSEMBLY__ +#include <linux/types.h> + +struct task_struct; +struct ksignal; + +#ifdef CONFIG_X86_USER_SHADOW_STACK +struct thread_shstk { + u64 base; + u64 size; +}; + +long shstk_prctl(struct task_struct *task, int option, unsigned long arg2); +void reset_thread_features(void); +unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clone_flags, + unsigned long stack_size); +void shstk_free(struct task_struct *p); +int setup_signal_shadow_stack(struct ksignal *ksig); +int restore_signal_shadow_stack(void); +#else +static inline long shstk_prctl(struct task_struct *task, int option, + unsigned long arg2) { return -EINVAL; } +static inline void reset_thread_features(void) {} +static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p, + unsigned long clone_flags, + unsigned long stack_size) { return 0; } +static inline void shstk_free(struct task_struct *p) {} +static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; } +static inline int restore_signal_shadow_stack(void) { return 0; } +#endif /* CONFIG_X86_USER_SHADOW_STACK */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SHSTK_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 600cf25dbfc6..ad98dd1d9cfb 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -22,10 +22,6 @@ DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); -DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) -DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); -#endif struct task_struct; @@ -132,11 +128,8 @@ void smp_kick_mwait_play_dead(void); void native_smp_send_reschedule(int cpu); void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); -void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); bool smp_park_other_cpus_in_init(void); - -void smp_store_boot_cpu_info(void); void smp_store_cpu_info(int id); asmlinkage __visible void smp_reboot_interrupt(void); @@ -186,13 +179,6 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) extern unsigned disabled_cpus; -#ifdef CONFIG_X86_LOCAL_APIC -extern int hard_smp_processor_id(void); - -#else /* CONFIG_X86_LOCAL_APIC */ -#define hard_smp_processor_id() 0 -#endif /* CONFIG_X86_LOCAL_APIC */ - #ifdef CONFIG_DEBUG_NMI_SELFTEST extern void nmi_selftest(void); #else diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index de48d1389936..d6cd9344f6c7 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -202,6 +202,19 @@ static inline void clwb(volatile void *__p) : [pax] "a" (p)); } +#ifdef CONFIG_X86_USER_SHADOW_STACK +static inline int write_user_shstk_64(u64 __user *addr, u64 val) +{ + asm_volatile_goto("1: wrussq %[val], (%[addr])\n" + _ASM_EXTABLE(1b, %l[fail]) + :: [addr] "r" (addr), [val] "r" (val) + :: fail); + return 0; +fail: + return -EFAULT; +} +#endif /* CONFIG_X86_USER_SHADOW_STACK */ + #define nop() asm volatile ("nop") static inline void serialize(void) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 80450e1d5385..25726893c6f4 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -3,6 +3,7 @@ #define _ASM_X86_TLBFLUSH_H #include <linux/mm_types.h> +#include <linux/mmu_notifier.h> #include <linux/sched.h> #include <asm/processor.h> @@ -253,6 +254,18 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); } +static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) +{ + bool should_defer = false; + + /* If remote CPUs need to be flushed then defer batch the flush */ + if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids) + should_defer = true; + put_cpu(); + + return should_defer; +} + static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { /* @@ -264,11 +277,18 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) return atomic64_inc_return(&mm->context.tlb_gen); } -static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch, - struct mm_struct *mm) +static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, + struct mm_struct *mm, + unsigned long uaddr) { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); + mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); +} + +static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm) +{ + flush_tlb_mm(mm); } extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); @@ -286,7 +306,8 @@ static inline bool pte_flags_need_flush(unsigned long oldflags, const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED; const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 | - _PAGE_SOFTW3 | _PAGE_SOFTW4; + _PAGE_SOFTW3 | _PAGE_SOFTW4 | + _PAGE_SAVED_DIRTY; const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT | _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 | diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index caf41c4869a0..3235ba1e5b06 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -136,10 +136,11 @@ static inline int topology_max_smt_threads(void) return __max_smt_threads; } +#include <linux/cpu_smt.h> + int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); -bool topology_smt_supported(void); extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -162,7 +163,6 @@ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } -static inline bool topology_smt_supported(void) { return false; } #endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h index 10b1de500ab1..afa524325e55 100644 --- a/arch/x86/include/asm/trap_pf.h +++ b/arch/x86/include/asm/trap_pf.h @@ -11,6 +11,7 @@ * bit 3 == 1: use of reserved bit detected * bit 4 == 1: fault was an instruction fetch * bit 5 == 1: protection keys block access + * bit 6 == 1: shadow stack access fault * bit 15 == 1: SGX MMU page-fault */ enum x86_pf_error_code { @@ -20,6 +21,7 @@ enum x86_pf_error_code { X86_PF_RSVD = 1 << 3, X86_PF_INSTR = 1 << 4, X86_PF_PK = 1 << 5, + X86_PF_SHSTK = 1 << 6, X86_PF_SGX = 1 << 15, }; diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 47ecfff2c83d..b1c9cea6ba88 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -18,7 +18,8 @@ void __init trap_init(void); asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); #endif -extern bool ibt_selftest(void); +extern int ibt_selftest(void); +extern int ibt_selftest_noendbr(void); #ifdef CONFIG_X86_F00F_BUG /* For handling the FOOF bug */ @@ -47,4 +48,16 @@ void __noreturn handle_stack_overflow(struct pt_regs *regs, struct stack_info *info); #endif +static inline void cond_local_irq_enable(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_enable(); +} + +static inline void cond_local_irq_disable(struct pt_regs *regs) +{ + if (regs->flags & X86_EFLAGS_IF) + local_irq_disable(); +} + #endif /* _ASM_X86_TRAPS_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 81b826d3b753..f2c02e4469cc 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -116,7 +116,7 @@ copy_user_generic(void *to, const void *from, unsigned long len) "2:\n" _ASM_EXTABLE_UA(1b, 2b) :"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT - : : "memory", "rax", "r8", "r9", "r10", "r11"); + : : "memory", "rax"); clac(); return len; } diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 1b6455f881f9..6989b824fd32 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -10,6 +10,7 @@ * Copyright (c) Russ Anderson <rja@sgi.com> */ +#include <linux/efi.h> #include <linux/rtc.h> /* @@ -115,7 +116,8 @@ struct uv_arch_type_entry { struct uv_systab { char signature[4]; /* must be UV_SYSTAB_SIG */ u32 revision; /* distinguish different firmware revs */ - u64 function; /* BIOS runtime callback function ptr */ + u64 (__efiapi *function)(enum uv_bios_cmd, ...); + /* BIOS runtime callback function ptr */ u32 size; /* systab size (starting with _VERSION_UV4) */ struct { u32 type:8; /* type of entry */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index fa9ec20783fa..85e63d58c074 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -295,7 +295,10 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn) /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) +static inline unsigned long virt_to_pfn(const void *v) +{ + return PFN_DOWN(__pa(v)); +} #define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 77a2d19cc990..abde0f44df57 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -2,12 +2,6 @@ #ifndef _ASM_X86_SWIOTLB_XEN_H #define _ASM_X86_SWIOTLB_XEN_H -#ifdef CONFIG_SWIOTLB_XEN -extern int pci_xen_swiotlb_init_late(void); -#else -static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } -#endif - int xen_swiotlb_fixup(void *buf, unsigned long nslabs); int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, |